solid_events 0.1.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +53 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +406 -10
  5. data/Rakefile +9 -5
  6. data/app/controllers/solid_events/api_controller.rb +782 -0
  7. data/app/controllers/solid_events/application_controller.rb +4 -0
  8. data/app/controllers/solid_events/incidents_controller.rb +38 -0
  9. data/app/controllers/solid_events/saved_views_controller.rb +43 -0
  10. data/app/controllers/solid_events/traces_controller.rb +731 -0
  11. data/app/helpers/solid_events/traces_helper.rb +79 -0
  12. data/app/jobs/solid_events/evaluate_incidents_job.rb +11 -0
  13. data/app/jobs/solid_events/prune_job.rb +26 -0
  14. data/app/models/solid_events/causal_edge.rb +9 -0
  15. data/app/models/solid_events/error_link.rb +11 -0
  16. data/app/models/solid_events/event.rb +11 -0
  17. data/app/models/solid_events/incident.rb +68 -0
  18. data/app/models/solid_events/incident_event.rb +13 -0
  19. data/app/models/solid_events/journey.rb +62 -0
  20. data/app/models/solid_events/record.rb +11 -0
  21. data/app/models/solid_events/record_link.rb +11 -0
  22. data/app/models/solid_events/saved_view.rb +11 -0
  23. data/app/models/solid_events/summary.rb +11 -0
  24. data/app/models/solid_events/trace.rb +85 -0
  25. data/app/views/layouts/solid_events/_style.html.erb +39 -0
  26. data/app/views/layouts/solid_events/application.html.erb +21 -14
  27. data/app/views/solid_events/incidents/events.html.erb +60 -0
  28. data/app/views/solid_events/traces/hot_path.html.erb +63 -0
  29. data/app/views/solid_events/traces/index.html.erb +532 -0
  30. data/app/views/solid_events/traces/show.html.erb +216 -0
  31. data/app/views/solid_events/traces/timeline.html.erb +54 -0
  32. data/config/locales/en.yml +4 -0
  33. data/config/routes.rb +35 -0
  34. data/db/migrate/20260216010000_create_solid_events_tables.rb +51 -0
  35. data/db/migrate/20260216020000_create_solid_events_summaries.rb +33 -0
  36. data/db/migrate/20260216030000_add_dimensions_to_solid_events_summaries.rb +20 -0
  37. data/db/migrate/20260216040000_add_request_id_to_solid_events_summaries.rb +8 -0
  38. data/db/migrate/20260216050000_add_sql_metrics_to_solid_events_summaries.rb +8 -0
  39. data/db/migrate/20260216060000_add_deploy_dimensions_to_solid_events_summaries.rb +17 -0
  40. data/db/migrate/20260216070000_create_solid_events_incidents.rb +30 -0
  41. data/db/migrate/20260216080000_add_schema_version_to_solid_events_summaries.rb +7 -0
  42. data/db/migrate/20260216090000_add_assignment_and_mute_to_solid_events_incidents.rb +12 -0
  43. data/db/migrate/20260216100000_add_resolution_metadata_to_solid_events_incidents.rb +11 -0
  44. data/db/migrate/20260216110000_add_assignment_audit_to_solid_events_incidents.rb +10 -0
  45. data/db/migrate/20260216120000_create_solid_events_saved_views.rb +17 -0
  46. data/db/migrate/20260216130000_create_solid_events_incident_events.rb +19 -0
  47. data/db/migrate/20260216140000_add_incident_event_lookup_indexes.rb +8 -0
  48. data/db/migrate/20260216150000_add_causal_links_to_solid_events.rb +15 -0
  49. data/db/migrate/20260216160000_create_solid_events_journeys_and_causal_edges.rb +45 -0
  50. data/lib/generators/solid_events/install/USAGE +8 -0
  51. data/lib/generators/solid_events/install/install_generator.rb +26 -0
  52. data/lib/generators/solid_events/install/templates/config/initializers/solid_events.rb +84 -0
  53. data/lib/generators/solid_events/install/templates/db/events_schema.rb +206 -0
  54. data/lib/solid_events/benchmark.rb +43 -0
  55. data/lib/solid_events/configuration.rb +167 -0
  56. data/lib/solid_events/context_scraper.rb +23 -0
  57. data/lib/solid_events/controller_tracing.rb +94 -0
  58. data/lib/solid_events/current.rb +15 -0
  59. data/lib/solid_events/engine.rb +93 -0
  60. data/lib/solid_events/incident_evaluator.rb +327 -0
  61. data/lib/solid_events/labeler.rb +21 -0
  62. data/lib/solid_events/notifiers/slack_webhook_notifier.rb +36 -0
  63. data/lib/solid_events/subscribers/action_cable_subscriber.rb +48 -0
  64. data/lib/solid_events/subscribers/controller_subscriber.rb +39 -0
  65. data/lib/solid_events/subscribers/enqueue_subscriber.rb +30 -0
  66. data/lib/solid_events/subscribers/error_subscriber.rb +107 -0
  67. data/lib/solid_events/subscribers/external_http_subscriber.rb +54 -0
  68. data/lib/solid_events/subscribers/job_subscriber.rb +45 -0
  69. data/lib/solid_events/subscribers/mailer_subscriber.rb +49 -0
  70. data/lib/solid_events/subscribers/sql_subscriber.rb +46 -0
  71. data/lib/solid_events/tracer.rb +672 -0
  72. data/lib/solid_events/version.rb +3 -1
  73. data/lib/solid_events.rb +210 -3
  74. data/lib/tasks/solid_events_tasks.rake +30 -4
  75. metadata +141 -28
  76. data/MIT-LICENSE +0 -20
  77. data/app/assets/config/solid_events_manifest.js +0 -1
  78. data/app/assets/stylesheets/solid_events/application.css +0 -15
  79. data/app/helpers/solid_events/application_helper.rb +0 -4
  80. data/app/jobs/solid_events/application_job.rb +0 -4
  81. data/app/mailers/solid_events/application_mailer.rb +0 -6
  82. data/app/models/solid_events/application_record.rb +0 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d2910788b8d97669bb5910944897e09bfa345049dbcc7a202c9bb15b166e6ec0
4
- data.tar.gz: a14e9651f663a73bdcea5013c05b44e4a54cbbb458ff5b6682be50440ee166ae
3
+ metadata.gz: 3f2d62fad5143f048bc02f3021bc0849a5863d8c931fb926034ae442e1a57403
4
+ data.tar.gz: 202f753a7e7f83c8db71d3f48f7571a2344b4c04de4f684d48b2e2a484c09b21
5
5
  SHA512:
6
- metadata.gz: 851ca865d64afdec2002085d8ab208540a05231ad4bf18c14ddd0857a643c5e28fe24d6955689409bafeb94cba6e23465b75202100862d66edf5dbba4bc6adbd
7
- data.tar.gz: 44c2b338e60e0b0234ef83d9226256d83afb09e6c6c2d1181888c170194c8c308b5089508f69937fd5c5a8643e89a2a477a24ad4b227dcbae77c22d69dd1317d
6
+ metadata.gz: dd96a8f3e9cb0893117565c678ec952b402ecf633958130e7b7192b262d9c36a6850078ab5d691f5dbed280966bd878823a36ea1d67a9ebdb2344d26999e3af6
7
+ data.tar.gz: 47ffcd56ca06c8ea42d83e8cf569d9e1ec173de0b07293bd7f9d0af272c720183ef13b8a9e50268bfc38ca4ef72df8be06c11f23f0ae931b5b0fbddb671bf73f
data/CHANGELOG.md ADDED
@@ -0,0 +1,53 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented in this file.
4
+
5
+ ## [v0.2.4] - 2026-03-11
6
+
7
+ - Switched installer guidance to schema-first events setup (`db/events_schema.rb`) to match solid_errors-style deployment flow.
8
+ - Removed migration-copy install flow from generator/tasks to avoid schema + replay drift in app installs.
9
+
10
+ ## [v0.2.3] - 2026-03-11
11
+
12
+ - Aligned installer/task conventions with sibling Solid gems: use `events` database key, `db/events_schema.rb`, and `db/events_migrate`.
13
+ - Kept v0.2.2 migration idempotency fixes for schema-first `db:prepare` workflows.
14
+
15
+ ## [v0.2.2] - 2026-03-11
16
+
17
+ - Fixed installer production config to use `config.solid_events.connects_to = { database: { writing: :events } }` so generated config matches common `database.yml` naming.
18
+ - Made installer-copied event migrations idempotent (`column_exists?`/`index_exists?`/`table_exists?` guards) to prevent duplicate table/column/index failures when `db:prepare` loads `db/events_schema.rb` before migrations.
19
+
20
+ ## [v0.2.1] - 2026-03-04
21
+
22
+ - Added GitHub CI workflow to run the test suite on push/pull request.
23
+ - Fixed dummy test app bootstrapping by disabling `maintain_test_schema` to prevent pending-migration failures in clean test runs.
24
+ - Improved local release workflow compatibility by adding a repo-level `cleo.yml`.
25
+ - Added QA bootstrap files (`.github/pull_request_template.md` and `.github/workflows/qa.yml`) for consistent quality workflow setup.
26
+ - Updated README formatting by removing emoji-prefixed headings and feature bullets.
27
+ - Updated gem version to `0.2.1`.
28
+
29
+ ## [v0.2.0] - 2026-02-16
30
+
31
+ - Expanded tracing beyond controller/job/SQL into Action Cable, mailer, and external HTTP spans with async causal links across request/job chains.
32
+ - Established canonical wide-event summaries (`solid_events_summaries`) with schema versioning, HTTP/timing/SQL/error/entity dimensions, and deploy/service metadata.
33
+ - Added journey/session story modeling with first-class journey records and timeline-focused narrative support.
34
+ - Improved correlation and explainability with record links, state diffs, causal edges, fingerprint grouping, and Solid Errors linking heuristics.
35
+ - Added incident management for observability use cases: detection (new fingerprints, spikes, p95, SLO burn), dedupe windows, lifecycle transitions, assignment/muting, and auto-recovery.
36
+ - Shipped observability APIs with token auth, cursor pagination, comparative metrics, aggregate endpoints, incident evidence/context payloads, and JSON exports.
37
+ - Upgraded investigation UI with richer trace filters, trace details, hot-path/regression views, saved views, compare mode, incident lifecycle pages, and journey drilldowns.
38
+ - Added signal-quality controls: default noise suppression for internal namespaces, allowlist overrides, tail sampling, context/payload redaction policies, truncation guards, and canonical JSON log emission.
39
+ - Added operational support: isolated DB install flow, pruning/evaluation jobs, benchmark utilities, and expanded Minitest coverage across tracer/subscribers/API/controllers/jobs.
40
+ - Fixed summary availability checks to avoid sticky false caching when the summaries table is temporarily unavailable, so subsequent traces can create canonical summaries once storage is ready.
41
+ - Fixed summary consistency by ensuring all error-link attachment paths (including reconciliation flows) refresh the trace summary, keeping `summary.error_count` aligned with persisted error links.
42
+ - Added tracer regressions for summary-availability retry behavior and summary error-count synchronization after reconciliation.
43
+ - Updated ignored generated test artifacts to exclude additional dummy SQLite output under `test/dummy/storage`.
44
+ - Updated development lockfile metadata for Ruby `4.0.1`.
45
+
46
+ ## [v0.1.0] - 2026-02-16
47
+
48
+ - Initial release of `solid_events`.
49
+ - Added Rails engine install generator and schema setup.
50
+ - Added request, SQL, and Active Job tracing.
51
+ - Added record linking and Solid Errors linking.
52
+ - Added trace dashboard UI with filtering, pagination, and trace details.
53
+ - Added configuration for ignore rules, retention, and DB connection selection.
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Solid Events
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md CHANGED
@@ -1,28 +1,424 @@
1
1
  # SolidEvents
2
- Short description and motivation.
3
2
 
4
- ## Usage
5
- How to use my plugin.
3
+ **The "Context Graph" for Rails Applications.**
4
+
5
+ `SolidEvents` is a zero-configuration, database-backed observability engine for Rails 8+. It automatically unifies system tracing (Controller/SQL), business events, and record linkages into a single, queryable SQL interface.
6
+
7
+ By storing traces in your own database (PostgreSQL/SQLite), `SolidEvents` eliminates the need for expensive external observability tools (Datadog, New Relic) while enabling deeper, context-aware AI debugging.
8
+
9
+ ## Scope
10
+
11
+ `SolidEvents` is strictly for observability and incident state:
12
+
13
+ - Detect incidents from canonical event/tracing data
14
+ - Store traces, events, summaries, and incident lifecycle state
15
+ - Expose that data through UI and APIs for humans and tools
16
+ - Manage incident state transitions (acknowledge, resolve, reopen, assign, mute)
17
+
18
+ It does **not** execute automation workflows (code fixes, PR creation, QA runs). That belongs in `solid_agents`.
19
+
20
+ ---
21
+
22
+ ## The "Why"
23
+
24
+ ### 1. Logs are Ephemeral, Decisions are Permanent
25
+
26
+ Traditional logs vanish. `SolidEvents` treats your application's execution history as **Business Data**. It captures the **Trace**—the exact sequence of decisions, logic branches, and data mutations—that led to a final state.
27
+
28
+ ### 2. The Context Graph (Zero Config)
29
+
30
+ Most bugs are impossible to fix because you lack context.
31
+
32
+ - _Error:_ "Payment Failed."
33
+ - _Missing Context:_ "This request was triggered by User #5, and it attempted to create Order #99."
34
+ - _SolidEvents Solution:_ We **automatically** link the **Trace** (User Actions) to the **Record** (Order #99) to the **Error** (SolidErrors).
35
+
36
+ ### 3. Owned Infrastructure
37
+
38
+ Stop renting your data.
39
+
40
+ - **Zero Monthly Cost:** No per-GB ingestion fees.
41
+ - **Privacy:** No PII leaves your server.
42
+ - **SQL Power:** Debug your app using standard SQL queries.
43
+
44
+ ---
45
+
46
+ ## Features
47
+
48
+ - **Auto-Instrumentation:** Automatically captures Controller Actions, Active Job executions, and SQL queries via `ActiveSupport::Notifications`.
49
+ - **Auto-Linking:** Automatically detects when an ActiveRecord model is created or updated during a request and links it to the Trace. (e.g., `Order.create` -> Linked to Trace).
50
+ - **Auto-Labeling:** Intelligently maps controller actions to business terms (e.g., `OrdersController#create` becomes `order.created`).
51
+ - **Context Scraping:** Automatically detects `current_user`, `current_account`, or `tenant_id` from your controllers and tags the trace.
52
+ - **Canonical Wide Events:** Maintains one summary row per trace with outcome, entity, HTTP, timing, and correlation fields for fast filtering.
53
+ - **Stable Schema Versioning:** Canonical events include `schema_version` for agent-safe parsing across upgrades.
54
+ - **Tail Sampling:** Keeps all failures and slow traces, then samples low-value successes by configurable rate.
55
+ - **Deploy-Aware Dimensions:** Captures service/environment/version/deployment/region on every canonical trace.
56
+ - **PII Redaction:** Redacts sensitive context/payload keys before persisting events and emitting logs.
57
+ - **Payload Size Guards:** Truncates oversized context/event payloads using configurable limits.
58
+ - **Path-Based Redaction:** Supports exact field-path redaction rules in addition to key matching.
59
+ - **Wide-Event Primary Mode:** Optionally skip sub-event row persistence while keeping canonical trace summaries complete.
60
+ - **Retention Tiers:** Keep success traces, error traces, and incidents for different durations.
61
+ - **Consumer APIs:** JSON endpoints for incidents and canonical traces at `/solid_events/api/...`.
62
+ - **Compare Mode:** UI + API support for window-over-window error-rate and latency comparisons.
63
+ - **Journey Sequences:** UI panel + API to reconstruct request/entity trace sequences for story-first debugging.
64
+ - **Timeline View:** Ordered cross-trace timeline for request/entity investigations.
65
+ - **Incident Timeline Markers:** Timeline view includes incident lifecycle milestones.
66
+ - **Saved Views:** Persist and re-apply investigation filters directly from the traces dashboard.
67
+ - **Shared View Links:** Generate immutable shared-view URLs from saved filters for team handoff.
68
+ - **API Token Auth:** Optional token protection for all `/solid_events/api/*` endpoints.
69
+ - **JSON Export:** Export filtered traces/incidents as JSON snapshots for handoff and auditing.
70
+ - **Rails 8 Native:** Built on top of the new [Rails 8 Event Reporter API](https://api.rubyonrails.org/classes/ActiveSupport/EventReporter.html) and `SolidQueue` standards.
71
+
72
+ ---
6
73
 
7
74
  ## Installation
75
+
8
76
  Add this line to your application's Gemfile:
9
77
 
10
78
  ```ruby
11
79
  gem "solid_events"
12
80
  ```
13
81
 
14
- And then execute:
82
+ Then run the installer:
83
+
15
84
  ```bash
16
- $ bundle
85
+ rails generate solid_events:install
86
+ rails db:migrate
17
87
  ```
18
88
 
19
- Or install it yourself as:
89
+ If your app uses an isolated `events` database, rely on `db/events_schema.rb` (schema-first) and run:
90
+
20
91
  ```bash
21
- $ gem install solid_events
92
+ rails db:prepare
22
93
  ```
23
94
 
95
+ ### Recommended: SolidErrors
96
+
97
+ For a complete "Autonomous Reliability" stack, install `solid_errors`. `SolidEvents` will automatically detect it and link Traces to Errors.
98
+
99
+ ```ruby
100
+ gem "solid_errors"
101
+ ```
102
+
103
+ ---
104
+
105
+ ## Zero-Configuration Behavior
106
+
107
+ Once installed, `SolidEvents` starts working immediately. You do **not** need to change your code.
108
+
109
+ ### 1. Automatic Record Linking
110
+
111
+ When your app creates data, we link it.
112
+
113
+ ```ruby
114
+ # Your existing code
115
+ def create
116
+ @order = Order.create(params) # <-- SolidEvents automatically links this Order ID to the current Trace
117
+ end
118
+ ```
119
+
120
+ ### 2. Automatic Business Events
121
+
122
+ We automatically label controller actions with semantic names in the Dashboard:
123
+
124
+ | Controller Action | Auto-Label |
125
+ | :------------------------------ | :-------------- |
126
+ | `OrdersController#create` (201) | `order.created` |
127
+ | `UsersController#update` (200) | `user.updated` |
128
+ | `SessionsController#destroy` | `session.ended` |
129
+
130
+ ### 3. Automatic Context
131
+
132
+ If your controller has a `current_user` method (Devise/standard pattern), we automatically capture the `user_id` and add it to the Trace Context.
133
+
134
+ ---
135
+
136
+ ## Configuration
137
+
138
+ We provide sane defaults (ignoring internal Rails tables), but you can tune exactly what gets tracked.
139
+
140
+ ```ruby
141
+ # config/initializers/solid_events.rb
142
+ SolidEvents.configure do |config|
143
+ # 1. Database Isolation (Recommended)
144
+ # Prevents logging writes from slowing down your main application.
145
+ config.connects_to = { database: { writing: :events } }
146
+
147
+ # 2. Privacy & Noise Control
148
+ # We automatically ignore SolidQueue, SolidCache, ActionMailbox, etc.
149
+ # Add your own internal models here:
150
+ config.ignore_models = [
151
+ "Ahoy::Event",
152
+ "AuditLog"
153
+ ]
154
+
155
+ # 3. Path Filtering
156
+ # Don't log health checks or assets
157
+ config.ignore_paths = ["/up", "/health", "/assets"]
158
+
159
+ # 4. Namespace filtering (applies to model links, SQL, and job traces)
160
+ # Defaults already include: solid_events, solid_errors, solid_queue,
161
+ # solid_cache, solid_cable, active_storage, action_text
162
+ config.ignore_namespaces << "paper_trail"
163
+ config.allow_sql_tables << "noticed_notifications" # re-enable one table
164
+ config.allow_job_prefixes << "job.active_storage" # re-enable if needed
165
+
166
+ # 5. Retention Policy
167
+ # Auto-delete logs older than 30 days
168
+ config.retention_period = 30.days
169
+
170
+ # 6. Tail Sampling (canonical wide-event style)
171
+ # Keep all errors/slow traces, sample the rest.
172
+ config.sample_rate = 0.2
173
+ config.tail_sample_slow_ms = 1000
174
+ config.always_sample_context_keys = ["release", "request_id"]
175
+ config.always_sample_when = ->(trace:, context:, duration_ms:) { context["tenant_id"].present? }
176
+
177
+ # 7. Emit one JSON line per sampled trace
178
+ config.emit_canonical_log_line = true
179
+
180
+ # 8. Deployment dimensions for cross-release debugging
181
+ config.service_name = "anywaye"
182
+ config.environment_name = Rails.env
183
+ config.service_version = ENV["APP_VERSION"]
184
+ config.deployment_id = ENV["DEPLOYMENT_ID"]
185
+ config.region = ENV["APP_REGION"]
186
+
187
+ # 9. Redaction policy
188
+ config.sensitive_keys += ["customer_email", "phone_number"]
189
+ config.redaction_paths = {
190
+ "payment.card.number" => "[REDACTED_CARD]",
191
+ "user.ssn" => true
192
+ }
193
+ config.redaction_placeholder = "[FILTERED]"
194
+ config.max_context_payload_bytes = 16_384
195
+ config.max_event_payload_bytes = 8_192
196
+ config.payload_truncation_placeholder = "[TRUNCATED]"
197
+
198
+ # 10. Feature slice dimensions captured into canonical payloads
199
+ config.feature_slice_keys = %w[feature_flag experiment release_channel plan]
200
+
201
+ # 11. Wide-event primary mode
202
+ config.wide_event_primary = true
203
+ config.persist_sub_events = false
204
+
205
+ # 12. Retention tiers
206
+ config.retention_period = 30.days
207
+ config.error_retention_period = 90.days
208
+ config.incident_retention_period = 180.days
209
+
210
+ # 13. Optional Slack incident notifier
211
+ # config.incident_notifier = SolidEvents::Notifiers::SlackWebhookNotifier.new(
212
+ # webhook_url: ENV.fetch("SOLID_EVENTS_SLACK_WEBHOOK_URL"),
213
+ # channel: "#incidents"
214
+ # )
215
+ end
216
+ ```
217
+
218
+ ### High-Signal Logging Without Disabling Rails Logs
219
+
220
+ `SolidEvents` emits one canonical JSON line per sampled trace so teams can rely on stable, queryable events while keeping default Rails logs enabled.
221
+
222
+ ### Add Business Context During Execution
223
+
224
+ You can enrich the current trace with product-specific dimensions from controllers, jobs, or services:
225
+
226
+ ```ruby
227
+ SolidEvents.annotate!(
228
+ plan: current_account.plan_name,
229
+ cart_value_cents: @cart.total_cents,
230
+ checkout_experiment: "checkout_v3"
231
+ )
232
+ ```
233
+
234
+ ### Agent-Friendly APIs
235
+
236
+ The mounted engine includes JSON endpoints for automation/agents:
237
+
238
+ - `GET /solid_events/api/incidents?status=active&limit=50`
239
+ - `GET /solid_events/api/incidents?status=active&limit=50&cursor=123`
240
+ - `GET /solid_events/api/incidents/:id/traces`
241
+ - `GET /solid_events/api/incidents/:id/context`
242
+ - `GET /solid_events/api/incidents/:id/events`
243
+ - `GET /solid_events/api/incidents/:id/evidences`
244
+ - `PATCH /solid_events/api/incidents/:id/acknowledge|resolve|reopen`
245
+ - `PATCH /solid_events/api/incidents/:id/assign` (`owner`, `team`, `assigned_by`, `assignment_note`)
246
+ - `PATCH /solid_events/api/incidents/:id/mute` (`minutes`)
247
+
248
+ Resolution metadata is supported via `PATCH /solid_events/api/incidents/:id/resolve`
249
+ with `resolved_by` and `resolution_note`.
250
+
251
+ Incident policies include `new_fingerprint`, `error_spike`, `p95_regression`,
252
+ `slo_burn_rate`, and `multi_signal_degradation`.
253
+
254
+ - `GET /solid_events/api/traces/:id`
255
+ - `GET /solid_events/api/traces?error_fingerprint=...`
256
+ - `GET /solid_events/api/traces?entity_type=Order&entity_id=123`
257
+ - `GET /solid_events/api/traces?limit=50&cursor=456`
258
+ - `GET /solid_events/api/traces?feature_key=feature_flag&feature_value=checkout_v2`
259
+ - `GET /solid_events/api/metrics/error_rates?dimension=source&window=24h`
260
+ - `GET /solid_events/api/metrics/error_rates?dimension=source&feature_key=feature_flag&feature_value=checkout_v2`
261
+ - `GET /solid_events/api/metrics/latency?dimension=deployment_id&window=7d`
262
+ - `GET /solid_events/api/metrics/compare?metric=error_rate&dimension=source&window=24h`
263
+ - `GET /solid_events/api/metrics/cohorts?cohort_key=plan&metric=error_rate&window=24h`
264
+ - `GET /solid_events/api/journeys?request_id=req-123&window=24h`
265
+ - `GET /solid_events/api/journeys?entity_type=Order&entity_id=123&window=24h`
266
+ - `GET /solid_events/api/journeys?request_id=req-123&window=24h&errors_only=true`
267
+ - `GET /solid_events/api/export/traces?format=json&status=error&window=24h`
268
+ - `GET /solid_events/api/export/incidents?format=json&status=active`
269
+
270
+ Exports currently support `format=json` only.
271
+
272
+ Set `config.api_token` (or `SOLID_EVENTS_API_TOKEN`) to require `X-Solid-Events-Token` or `Authorization: Bearer <token>`.
273
+ List endpoints return `{ data: [...], next_cursor: <id|null> }` for cursor pagination.
274
+ Set `config.evaluate_incidents_on_request = false` in production if you only want job-driven evaluation.
275
+
276
+ `context` includes `solid_errors` enrichment when available.
277
+
278
+ API contract and versioning details: `docs/api.md`.
279
+ Migration guide: `docs/migration.md`.
280
+ Performance baseline and query plan notes: `docs/performance.md`.
281
+ Incident policy tuning by environment: `docs/incident_policies.md`.
282
+
283
+ ### Incident Policies by Environment
284
+
285
+ Recommended defaults:
286
+
287
+ - Development: high thresholds to avoid noisy local incidents.
288
+ - Staging: medium thresholds to catch regressions before deploy.
289
+ - Production: low thresholds to detect customer-facing degradation quickly.
290
+
291
+ Use this reference config:
292
+
293
+ ```ruby
294
+ case Rails.env
295
+ when "development"
296
+ config.incident_slo_target_error_rate_pct = 5.0
297
+ config.incident_slo_burn_rate_threshold = 4.0
298
+ config.incident_multi_signal_error_rate_pct = 25.0
299
+ config.incident_multi_signal_p95_factor = 2.0
300
+ config.incident_multi_signal_sql_duration_ms = 500.0
301
+ when "staging"
302
+ config.incident_slo_target_error_rate_pct = 2.0
303
+ config.incident_slo_burn_rate_threshold = 3.0
304
+ config.incident_multi_signal_error_rate_pct = 15.0
305
+ config.incident_multi_signal_p95_factor = 1.6
306
+ config.incident_multi_signal_sql_duration_ms = 300.0
307
+ else
308
+ config.incident_slo_target_error_rate_pct = 1.0
309
+ config.incident_slo_burn_rate_threshold = 2.0
310
+ config.incident_multi_signal_error_rate_pct = 10.0
311
+ config.incident_multi_signal_p95_factor = 1.4
312
+ config.incident_multi_signal_sql_duration_ms = 200.0
313
+ end
314
+ ```
315
+
316
+ ### Benchmarking
317
+
318
+ Run a lightweight query benchmark:
319
+
320
+ ```bash
321
+ bundle exec rake "solid_events:benchmark[200]"
322
+ bundle exec rake "solid_events:benchmark_check[200,150,250]"
323
+ ```
324
+
325
+ Inspect incident lifecycle history in UI:
326
+
327
+ - `/solid_events/incidents/:id/events`
328
+
329
+ ### Scheduling (Production)
330
+
331
+ To avoid relying on dashboard traffic, schedule these:
332
+
333
+ - `SolidEvents::EvaluateIncidentsJob.perform_later` every 5 minutes
334
+ - `SolidEvents::PruneJob.perform_later` daily
335
+
336
+ Rake alternatives (cron-friendly):
337
+
338
+ - `bin/rails solid_events:evaluate_incidents`
339
+ - `bin/rails solid_events:prune`
340
+
341
+ Example cron entries:
342
+
343
+ ```cron
344
+ */5 * * * * cd /app && bin/rails solid_events:evaluate_incidents RAILS_ENV=production
345
+ 15 2 * * * cd /app && bin/rails solid_events:prune RAILS_ENV=production
346
+ ```
347
+
348
+ Example `config/recurring.yml` (Solid Queue):
349
+
350
+ ```yaml
351
+ production:
352
+ evaluate_solid_events_incidents:
353
+ class: "SolidEvents::EvaluateIncidentsJob"
354
+ schedule: "every 5 minutes"
355
+ prune_solid_events:
356
+ class: "SolidEvents::PruneJob"
357
+ schedule: "every day at 2:15am"
358
+ ```
359
+
360
+ ### Incident Response Runbook
361
+
362
+ Minimal flow your team/agents can automate:
363
+
364
+ 1. `GET /solid_events/api/incidents?status=active`
365
+ 2. For each incident: `GET /solid_events/api/incidents/:id/traces`
366
+ 3. Execute fix workflow from canonical trace context.
367
+ 4. Mark state with:
368
+ - `PATCH /solid_events/api/incidents/:id/acknowledge`
369
+ - `PATCH /solid_events/api/incidents/:id/resolve`
370
+
371
+ This gives a full closed-loop process without depending on raw Rails logs.
372
+
373
+ ---
374
+
375
+ ## The Dashboard (Mission Control)
376
+
377
+ Mount the dashboard in your `config/routes.rb` to view your Context Graph.
378
+
379
+ ```ruby
380
+ authenticate :user, ->(u) { u.admin? } do
381
+ mount SolidEvents::Engine, at: "/solid_events"
382
+ end
383
+ ```
384
+
385
+ **Features:**
386
+
387
+ - **Live Tail:** See requests coming in real-time.
388
+ - **Trace Waterfall:** Visualize the sequence: `Controller` -> `Model` -> `SQL` -> `Job`.
389
+ - **Entity Search:** Search for "Order 123" to see every trace that ever touched that order.
390
+ - **Dimension Filters:** Filter by entity type/id, context key/value, status, source, and minimum duration.
391
+ - **Fingerprint Filter:** Filter directly by canonical `error_fingerprint` from the traces index.
392
+ - **Request Correlation:** Filter and pivot by canonical `request_id` to stitch related traces instantly.
393
+ - **Correlation Pivots:** On each trace page, see related entity/error clusters and a simple duration regression signal.
394
+ - **Related Trace Exploration:** Jump from one trace to all traces sharing the same entity or error fingerprint.
395
+ - **Regression Surfacing:** Index highlights latency regressions and newly-seen error fingerprints.
396
+ - **Hot Paths & Percentiles:** Automatic p50/p95/p99 and error-rate visibility for top paths/jobs.
397
+ - **SLO Panels:** Throughput + error rate + p95/p99 at a glance for the active filter window.
398
+ - **Hot Path Drilldown:** Hourly p95 and recent failing traces for a selected route/job.
399
+ - **Incidents Feed:** Built-in detection for new fingerprints, error spikes, and p95 regressions.
400
+ - **Incident Lifecycle:** Acknowledge, resolve, and reopen incidents from the dashboard feed.
401
+ - **Incident Noise Control:** Suppression rules, dedupe windows, and notifier hooks for alert pipelines.
402
+ - **Deploy-Aware Error Detection:** Highlights fingerprints unique to current deploy/version.
403
+
404
+ ---
405
+
406
+ ## The Future: SolidCopilot
407
+
408
+ `SolidEvents` is the data foundation for **SolidCopilot**, an AI agent that uses this data to:
409
+
410
+ 1. **Auto-Fix Bugs:** By reading the Trace History leading up to an error.
411
+ 2. **Generate Tests:** By converting real Production Traces into Minitest files.
412
+ 3. **Explain Architecture:** By visualizing the actual flow of data through your app.
413
+
414
+ _(Coming Soon)_
415
+
416
+ ---
417
+
24
418
  ## Contributing
25
- Contribution directions go here.
26
419
 
27
- ## License
28
- The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
420
+ This project is open source (MIT). We welcome contributions that align with the **"Solid"** philosophy: simple, SQL-backed, and Rails-native.
421
+
422
+ ---
423
+
424
+ **License:** MIT
data/Rakefile CHANGED
@@ -1,8 +1,12 @@
1
- require "bundler/setup"
1
+ # frozen_string_literal: true
2
2
 
3
- APP_RAKEFILE = File.expand_path("test/dummy/Rakefile", __dir__)
4
- load "rails/tasks/engine.rake"
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
5
 
6
- load "rails/tasks/statistics.rake"
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.pattern = "test/**/*_test.rb"
9
+ t.verbose = true
10
+ end
7
11
 
8
- require "bundler/gem_tasks"
12
+ task default: :test