RubyGems - chrono_forge-dashboard - Versions diffs - 0.1.0 - Mend

chrono_forge-dashboard 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

data/app/controllers/chrono_forge/dashboard/workflows_controller.rb ADDED Viewed

@@ -0,0 +1,31 @@
+module ChronoForge
+  module Dashboard
+    class WorkflowsController < BaseController
+      def index
+        @query = WorkflowsQuery.new(**list_params)
+        @workflows = @query.records
+        @waits = WaitStatePresenter.active_map(@workflows)
+        stats = StatsQuery.new
+        @stats = stats.counts
+        @stats_cap = stats.cap
+      end
+      def show
+        @workflow = ChronoForge::Workflow.find(params[:id])
+        @timeline = TimelinePresenter.new(@workflow)
+        @context = ContextPresenter.new(@workflow)
+        @wait = WaitStatePresenter.new(@workflow).active
+        @periodic = PeriodicHealthPresenter.new(@workflow).tasks
+        @branches = BranchesPresenter.new(@workflow)
+        @parent_log = @workflow.parent_execution_log
+      end
+      private
+      def list_params
+        params.permit(:state, :job_class, :key, :created_from, :created_to, :before, :after)
+          .to_h.symbolize_keys.merge(per: ChronoForge::Dashboard.config.page_size)
+      end
+    end
+  end
+end

data/app/helpers/chrono_forge/dashboard/dashboard_helper.rb ADDED Viewed

@@ -0,0 +1,153 @@
+module ChronoForge
+  module Dashboard
+    module DashboardHelper
+      # Display order for state counts: active work first, terminal last. Any
+      # unknown states are appended so a new core state never silently vanishes.
+      STATE_ORDER = %w[running idle stalled failed completed].freeze
+      def cf_state_order(keys)
+        (STATE_ORDER & keys) + (keys - STATE_ORDER)
+      end
+      def cf_badge(state)
+        tag.span(state, class: "cf-pill cf-pill-#{state}")
+      end
+      # Shared "chip" treatment for inline nav/action links (metrics, details,
+      # repetitions, open, pagination, back) — a subtle bordered button, never an
+      # underlined text link. Pass extra utility classes (margins, truncation).
+      def cf_chip(extra = nil)
+        ["inline-flex items-center rounded-md border border-zinc-200 px-2 py-0.5 text-xs text-zinc-600 hover:bg-zinc-50", extra].compact.join(" ")
+      end
+      # State badge, upgraded to "scheduled" for an idle workflow parked on a
+      # wait whose wake time is still in the future — so genuinely-scheduled work
+      # doesn't read as "stuck idle".
+      def cf_state_badge(workflow, wait = nil)
+        return cf_badge("scheduled") if workflow.idle? && wait&.scheduled?
+        cf_badge(workflow.state)
+      end
+      def cf_dot(state)
+        tag.span(class: "cf-dot cf-dot-#{state}")
+      end
+      def cf_time(t)
+        t&.iso8601 || "—"
+      end
+      # A capped count: shows "5000+" once the count saturates its cap.
+      def cf_capped(count, cap)
+        (count >= cap) ? "#{cap}+" : count.to_s
+      end
+      # Whether the viewer prefers absolute timestamps (cookie-persisted nav toggle).
+      def cf_absolute_time?
+        cookies[:cf_time_format] == "absolute"
+      end
+      # Auto-refresh interval in seconds (0 = off). A cookie-persisted nav control
+      # overrides the configured default per viewer; options come from config.
+      def cf_poll_options = ChronoForge::Dashboard.config.polling_interval_options
+      def cf_poll_interval
+        raw = cookies[:cf_poll_interval]
+        return raw.to_i if raw.present? && raw.match?(/\A\d+\z/)
+        ChronoForge::Dashboard.config.polling_interval.to_i
+      end
+      def cf_poll_label(secs)
+        return "off" if secs.zero?
+        (secs % 60 == 0) ? "#{secs / 60}m" : "#{secs}s"
+      end
+      # A timestamp shown relative ("3 minutes ago") or absolute (raw ISO8601)
+      # per the viewer's preference, with the other form available on hover.
+      def cf_ago(t)
+        return "—" unless t
+        rel = "#{time_ago_in_words(t)} ago"
+        abs = t.iso8601
+        shown, hover = cf_absolute_time? ? [abs, rel] : [rel, abs]
+        tag.span(shown, title: hover, class: "cursor-help")
+      end
+      # Human duration between two times (e.g. "1m 04s"); "—" if unfinished.
+      def cf_duration(from, to)
+        return "—" unless from && to
+        cf_secs((to - from).to_i)
+      end
+      # Human duration from a number of seconds, scaled to the two most-significant
+      # units (e.g. "45s", "1m 04s", "3h 12m", "2d 21h"); "—" if nil.
+      def cf_secs(secs)
+        return "—" if secs.nil?
+        secs = secs.to_i
+        return "#{secs}s" if secs < 60
+        return "#{secs / 60}m #{(secs % 60).to_s.rjust(2, "0")}s" if secs < 3600
+        return "#{secs / 3600}h #{(secs % 3600 / 60).to_s.rjust(2, "0")}m" if secs < 86400
+        "#{secs / 86400}d #{(secs % 86400 / 3600).to_s.rjust(2, "0")}h"
+      end
+      # Class name for a stacked-bar segment, width quantized to 5% steps so it
+      # stays CSP-safe (no inline style — see .cf-bar-{0..100} in tailwind.css).
+      def cf_bar_width(value, max)
+        pct = (max.to_f.zero? ? 0 : (value / max.to_f * 100))
+        "cf-bar-#{(pct / 5).round * 5}"
+      end
+      # A rate (0.0–1.0) as a percentage; "—" if nil. Keeps tiny non-zero rates
+      # visible (a 0.0008% workflow-failure rate shows "<0.01%", never "0%").
+      def cf_pct(rate)
+        return "—" if rate.nil?
+        pct = rate * 100
+        return "0%" if pct.zero?
+        return "<0.01%" if pct < 0.01
+        (pct < 1) ? "#{pct.round(2)}%" : "#{pct.round}%"
+      end
+      # Concise latency summary (avg + most recent) from a list of run seconds.
+      def cf_latency_summary(latencies)
+        return "—" if latencies.blank?
+        avg = (latencies.sum.to_f / latencies.size).round
+        "avg #{avg}s · last #{latencies.last}s"
+      end
+      # Short, readable label for a parsed step kind.
+      KIND_LABELS = {
+        execute: "execute", sleep: "wait", wait: "wait until", continue: "continue if",
+        repeat_coordination: "repeat", repeat_run: "run", lifecycle: "workflow",
+        branch: "branch", merge: "merge", unknown: "step"
+      }.freeze
+      def cf_kind_label(kind)
+        KIND_LABELS.fetch(kind, kind.to_s)
+      end
+      # Human-friendly [label, value] pairs of a step's metadata for the timeline
+      # — surfaces things like a wait's resume time, a wait_until timeout, or a
+      # durably_repeat's last execution. Keys are humanized; values are stringified
+      # (the view truncates). Blank values are dropped.
+      # Internal bookkeeping surfaced elsewhere (the linked error is rendered
+      # inline; branch poll state + spawn cursors show in the Branches panel), so
+      # they'd just be noise in the timeline's metadata line. poll_token is the
+      # merge poller's fencing token — pure plumbing, never user-facing.
+      META_SKIP = %w[error_log_id poll poll_token cursors].freeze
+      def cf_meta_pairs(metadata)
+        return [] unless metadata.is_a?(Hash)
+        metadata
+          .reject { |k, v| v.nil? || v == "" || META_SKIP.include?(k.to_s) }
+          .map { |k, v| [k.to_s.tr("_", " "), v.to_s] }
+      end
+      # Text color for an execution-log status (pending/completed/failed).
+      def cf_status_color(status)
+        case status
+        when "completed" then "text-emerald-600"
+        when "failed" then "text-rose-600"
+        else "text-zinc-500"
+        end
+      end
+    end
+  end
+end

data/app/presenters/chrono_forge/dashboard/branch_presenter.rb ADDED Viewed

@@ -0,0 +1,64 @@
+module ChronoForge
+  module Dashboard
+    # Health of a single branch (a branch$<name> execution log) for the parent's
+    # detail page. Every child count is CAPPED and index-only on
+    # (parent_execution_log_id, state) — a branch can hold hundreds of thousands
+    # of children, so we never count the full set, only up to CAP (shown "CAP+").
+    class BranchPresenter
+      CAP = 5000
+      # merge_state: :merged | :merging | nil (not yet merged)
+      def initialize(log, merge_state = nil)
+        @log = log
+        @merge_state = merge_state
+      end
+      attr_reader :log, :merge_state
+      def name = StepNameParser.parse(@log.step_name).name
+      # The branch is "sealed" once its block closed (done dispatching children).
+      def sealed? = @log.completed?
+      def dispatched = capped(children)
+      def pending = capped(children.where.not(state: ChronoForge::Workflow.states[:completed]))
+      def blocked = capped(children.where(state: BLOCKED_STATES))
+      def cap = CAP
+      BLOCKED_STATES = %i[failed stalled].map { |s| ChronoForge::Workflow.states[s] }.freeze
+      # A scheduled next poll this far past due means the BranchMergeJob poller
+      # likely never ran (queue latency aside) — a heuristic, hence "potential".
+      POLL_OVERDUE_GRACE = 120 # seconds
+      # The BranchMergeJob stamps its poll state onto the branch log's metadata
+      # (it can't be queried from the backend; ActiveJob has no such API).
+      def polled? = poll.present?
+      def last_polled_at = parse_time(poll&.dig("last_polled_at"))
+      def next_poll_at = parse_time(poll&.dig("next_poll_at"))
+      def polls = poll&.dig("polls").to_i
+      # next_poll_at is nil once the merge completes, so a finished merge never
+      # looks overdue; a non-nil time well in the past = the poller is likely dead.
+      def poll_overdue?
+        t = next_poll_at
+        t.present? && t < Time.current - POLL_OVERDUE_GRACE
+      end
+      private
+      def children = @log.spawned_workflows
+      def poll = @log.metadata&.dig("poll")
+      def parse_time(value) = value.present? ? Time.zone.parse(value.to_s) : nil
+      # Index-only COUNT over a LIMIT CAP subquery — O(CAP) regardless of how many
+      # children match (mirrors StatsQuery).
+      def capped(relation)
+        ChronoForge::Workflow.from(relation.reorder(nil).select(:id).limit(CAP), :capped).count
+      end
+    end
+  end
+end

data/app/presenters/chrono_forge/dashboard/branches_presenter.rb ADDED Viewed

@@ -0,0 +1,62 @@
+module ChronoForge
+  module Dashboard
+    # A workflow's branches for its detail page. Loads only the coordination logs
+    # (branch$<name> and merge$<names>) — a tiny set — and derives each branch's
+    # merge state from the merge logs (the core doesn't persist a "merged" flag).
+    class BranchesPresenter
+      # One merge join (a merge$<names> log = a BranchMergeJob's durable target).
+      # state: :merging (pending — a poller is joining) | :merged (completed).
+      Merge = Struct.new(:names, :state, :started_at) do
+        def merging? = state == :merging
+      end
+      def initialize(workflow) = @workflow = workflow
+      def any? = branch_logs.any?
+      def branches
+        @branches ||= branch_logs
+          .sort_by(&:step_name)
+          .map { |log| BranchPresenter.new(log, merge_states[StepNameParser.parse(log.step_name).name]) }
+      end
+      # The merge joins on this workflow, in-progress first. A long-pending merge
+      # with no blocked children is the sign of a dropped BranchMergeJob poller.
+      def merges
+        @merges ||= merge_logs
+          .map { |log| Merge.new(StepNameParser.parse(log.step_name).name.split(","), log.completed? ? :merged : :merging, log.started_at) }
+          .sort_by { |m| [m.merging? ? 0 : 1, m.started_at || Time.current] }
+      end
+      private
+      def coordination_logs
+        d = StepNameParser::DELIM
+        @coordination_logs ||= @workflow.execution_logs
+          .where("step_name LIKE ? OR step_name LIKE ?", "branch#{d}%", "merge#{d}%")
+          .to_a
+      end
+      def branch_logs
+        coordination_logs.select { |l| StepNameParser.parse(l.step_name).kind == :branch }
+      end
+      def merge_logs
+        coordination_logs.select { |l| StepNameParser.parse(l.step_name).kind == :merge }
+      end
+      # branch name => :merged (merge log completed) | :merging (pending). A merge
+      # log covers one or more comma-joined branch names; "merged" wins if a name
+      # appears in both a completed and a pending merge.
+      def merge_states
+        @merge_states ||= merge_logs
+          .each_with_object({}) do |log, map|
+            state = log.completed? ? :merged : :merging
+            StepNameParser.parse(log.step_name).name.split(",").each do |nm|
+              map[nm] = state unless map[nm] == :merged
+            end
+          end
+      end
+    end
+  end
+end

data/app/presenters/chrono_forge/dashboard/context_presenter.rb ADDED Viewed

@@ -0,0 +1,17 @@
+module ChronoForge
+  module Dashboard
+    class ContextPresenter
+      def initialize(workflow) = @workflow = workflow
+      def nodes
+        context.map { |k, v| {key: k, value: v, type: v.class.name, bytes: v.to_json.bytesize} }
+      end
+      def byte_size = context.to_json.bytesize
+      private
+      def context = @workflow.context || {}
+    end
+  end
+end

data/app/presenters/chrono_forge/dashboard/periodic_health_presenter.rb ADDED Viewed

@@ -0,0 +1,77 @@
+module ChronoForge
+  module Dashboard
+    # Health of a workflow's durably_repeat tasks. Never materializes the full run
+    # history (which can be huge) — coordination logs are a tiny set, and each
+    # task's run aggregates are computed with bounded/scoped queries that ride the
+    # [workflow_id, step_name] index as range scans.
+    class PeriodicHealthPresenter
+      Task = Struct.new(:name, :last_execution_at, :next_scheduled_at, :timed_out_count, :latencies)
+      RECENT = 20
+      # Bound the metadata scan used to count missed ticks (see #missed_ticks).
+      SCAN_CAP = 1_000
+      def initialize(workflow) = @workflow = workflow
+      def tasks
+        coordinations.map do |coord|
+          name = StepNameParser.parse(coord.step_name).name
+          Task.new(
+            name: name,
+            last_execution_at: parse_time(coord.metadata&.dig("last_execution_at")),
+            next_scheduled_at: next_scheduled(name),
+            timed_out_count: missed_ticks(name),
+            latencies: recent_latencies(name)
+          )
+        end
+      end
+      private
+      # The coordination logs (durably_repeat$name, no $ts suffix) — one per task.
+      def coordinations
+        @workflow.execution_logs
+          .where("step_name LIKE ?", "durably_repeat#{d}%")
+          .where.not("step_name LIKE ?", "durably_repeat#{d}%#{d}%")
+          .to_a
+      end
+      def runs(name)
+        @workflow.execution_logs.where("step_name LIKE ?", "durably_repeat#{d}#{name}#{d}%")
+      end
+      # Missed (timed-out) ticks. A fast-forward catch-up collapses N expired ticks
+      # into one TimeoutError row tagged fast_forwarded:N, so count it as N; a plain
+      # per-tick timeout counts as 1. Bounded metadata scan.
+      def missed_ticks(name)
+        runs(name).where(error_class: "TimeoutError")
+          .limit(SCAN_CAP).pluck(:metadata)
+          .sum { |m| [m&.dig("fast_forwarded").to_i, 1].max }
+      end
+      # Next run = the furthest-out not-yet-completed scheduled repetition. Pending
+      # runs are few (the future-scheduled ones), so loading them is bounded.
+      def next_scheduled(name)
+        ts = runs(name).where(state: ChronoForge::ExecutionLog.states[:pending])
+          .filter_map { |r| StepNameParser.parse(r.step_name).timestamp }.max
+        Time.zone.at(ts) if ts
+      end
+      # Durations (seconds) of the most recent completed runs, oldest-first so the
+      # summary's "last" is the newest. Bounded to RECENT rows.
+      def recent_latencies(name)
+        runs(name).where(state: ChronoForge::ExecutionLog.states[:completed])
+          .where.not(started_at: nil, completed_at: nil)
+          .order(id: :desc).limit(RECENT)
+          .map { |r| (r.completed_at - r.started_at).to_i }.reverse
+      end
+      def parse_time(value)
+        return nil if value.blank?
+        value.is_a?(Time) ? value : Time.zone.parse(value.to_s)
+      end
+      def d = StepNameParser::DELIM
+    end
+  end
+end

data/app/presenters/chrono_forge/dashboard/timeline_presenter.rb ADDED Viewed

@@ -0,0 +1,90 @@
+module ChronoForge
+  module Dashboard
+    class TimelinePresenter
+      Entry = Struct.new(:id, :kind, :name, :step_name, :status, :attempts,
+        :started_at, :completed_at, :last_executed_at, :error_class, :error_message,
+        :metadata, :errors, :missing_error_id, :iterations, :tombstones, :skipped_ticks, :last_run_at)
+      # Per-iteration run logs of a durably_repeat step are excluded from the
+      # timeline (they get their own paginated page) and summarized instead.
+      RUN_PATTERN = "durably_repeat#{StepNameParser::DELIM}%#{StepNameParser::DELIM}%".freeze
+      def initialize(workflow) = @workflow = workflow
+      attr_reader :workflow
+      def entries
+        @entries ||= build
+      end
+      # Error logs not shown on any step — workflow-level failures whose step_name
+      # is nil and that aren't linked to a $workflow_failure$ marker. Surfaced so
+      # a failure is never invisible. (Repeat-run errors live on the repetitions
+      # page, so they're excluded.)
+      def orphan_errors
+        entries
+        @orphan_errors
+      end
+      def current_position
+        logs = ordered_logs
+        logs.reverse.find { |l| l.failed? } ||
+          logs.reverse.find { |l| l.pending? && StepNameParser.parse(l.step_name).kind == :wait } ||
+          logs.last
+      end
+      private
+      def ordered_logs
+        @ordered_logs ||= @workflow.execution_logs
+          .where.not("step_name LIKE ?", RUN_PATTERN)
+          .order(Arel.sql("started_at, id")).to_a
+      end
+      def build
+        all_errors = @workflow.error_logs.order(:attempt, :created_at).to_a
+        by_step = all_errors.group_by(&:step_name)
+        by_id = all_errors.index_by(&:id)
+        shown = []
+        entries = ordered_logs.map do |l|
+          p = StepNameParser.parse(l.step_name)
+          errors = (by_step[l.step_name] || []).dup
+          # A workflow-level failure ($workflow_failure$<id>) records its error
+          # with a nil step_name, so attach it to the marker by id. If that error
+          # log is gone (independently pruned), note the id so the marker still
+          # says *something* rather than rendering an errorless failure.
+          missing_error_id = nil
+          if p.kind == :lifecycle && p.name == "failure" && p.timestamp
+            if (err = by_id[p.timestamp])
+              errors << err unless errors.include?(err)
+            else
+              missing_error_id = p.timestamp
+            end
+          end
+          shown.concat(errors)
+          entry = Entry.new(id: l.id, kind: p.kind, name: p.name, step_name: l.step_name,
+            status: l.state, attempts: l.attempts, started_at: l.started_at,
+            completed_at: l.completed_at, last_executed_at: l.last_executed_at,
+            error_class: l.error_class, error_message: l.error_message,
+            metadata: l.metadata, errors: errors, missing_error_id: missing_error_id)
+          summarize_repetitions(entry, p.name) if p.kind == :repeat_coordination
+          entry
+        end
+        @orphan_errors = (all_errors - shown).reject { |e| e.step_name.to_s.match?(RUN_PATTERN_RX) }
+        entries
+      end
+      RUN_PATTERN_RX = /\Adurably_repeat#{Regexp.escape(StepNameParser::DELIM)}.+#{Regexp.escape(StepNameParser::DELIM)}/
+      def summarize_repetitions(entry, name)
+        s = RepetitionsQuery.new(workflow: @workflow, step: name).summary
+        entry.iterations = s[:iterations]
+        entry.tombstones = s[:tombstones]
+        entry.skipped_ticks = s[:skipped_ticks]
+        entry.last_run_at = s[:last_run_at]
+      end
+    end
+  end
+end

data/app/presenters/chrono_forge/dashboard/wait_state_presenter.rb ADDED Viewed

@@ -0,0 +1,64 @@
+module ChronoForge
+  module Dashboard
+    class WaitStatePresenter
+      # kind: :wait (wait_until — polls, has a timeout) or :continue (continue_if
+      # — waits on an external event, NO timeout, never self-resumes). A stuck
+      # continue_if is the silent killer: a webhook that never arrives leaves the
+      # workflow parked forever with nothing to flag it.
+      Active = Struct.new(:kind, :condition, :waiting_since, :timeout_at) do
+        # Only a time-based wait with a wake time still in the future is
+        # "scheduled" (intentionally parked until then). Event waits never are.
+        def scheduled?
+          return false unless kind == :wait && timeout_at
+          t = timeout_at.is_a?(Time) ? timeout_at : Time.zone.parse(timeout_at.to_s)
+          t&.future? || false
+        end
+        def event_wait? = kind == :continue
+        # The scheduled wake time as a Time, or nil for event waits / no timeout.
+        def next_run_at
+          return nil unless kind == :wait && timeout_at
+          timeout_at.is_a?(Time) ? timeout_at : Time.zone.parse(timeout_at.to_s)
+        end
+      end
+      def initialize(workflow) = @workflow = workflow
+      # Reuses the batch resolver so a single workflow and a page of them agree:
+      # it looks at the latest *pending wait/continue* log, ignoring durably_repeat
+      # run logs (which stamp started_at = now and would otherwise mask the wait).
+      def active
+        self.class.active_map([@workflow])[@workflow.id]
+      end
+      # Active waits for a batch of workflows, in two queries instead of one per
+      # row. Returns {workflow_id => Active} for idle workflows currently parked
+      # on a pending wait_until or continue_if. Bounded by the caller's set.
+      def self.active_map(workflows)
+        ids = workflows.select(&:idle?).map(&:id)
+        return {} if ids.empty?
+        d = StepNameParser::DELIM
+        latest = {}
+        ChronoForge::ExecutionLog
+          .where(workflow_id: ids, state: ChronoForge::ExecutionLog.states[:pending])
+          .where("step_name LIKE ? OR step_name LIKE ?", "wait_until#{d}%", "continue_if#{d}%")
+          .order(Arel.sql("started_at, id"))
+          .each { |log| latest[log.workflow_id] = log }
+        latest.transform_values { |log| build(log) }
+      end
+      def self.build(log)
+        p = StepNameParser.parse(log.step_name)
+        Active.new(
+          kind: p.kind,
+          condition: p.name,
+          waiting_since: log.last_executed_at || log.started_at,
+          timeout_at: log.metadata&.dig("timeout_at")
+        )
+      end
+    end
+  end
+end