RubyGems - leva - Versions diffs - 0.1.4 → 0.1.6 - Mend

leva 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

checksums.yaml +4 -4
data/README.md +52 -16
data/app/controllers/leva/dataset_records_controller.rb +21 -0
data/app/controllers/leva/datasets_controller.rb +9 -2
data/app/controllers/leva/experiments_controller.rb +34 -9
data/app/controllers/leva/runner_results_controller.rb +8 -0
data/app/controllers/leva/workbench_controller.rb +85 -12
data/app/helpers/leva/application_helper.rb +39 -0
data/app/javascript/controllers/prompt_form_controller.js +45 -0
data/app/javascript/controllers/prompt_selector_controller.js +31 -0
data/app/jobs/leva/experiment_job.rb +9 -4
data/app/jobs/leva/run_eval_job.rb +40 -0
data/app/models/concerns/leva/recordable.rb +37 -0
data/app/models/leva/dataset.rb +15 -6
data/app/models/leva/dataset_record.rb +43 -5
data/app/models/leva/evaluation_result.rb +22 -14
data/app/models/leva/experiment.rb +26 -14
data/app/models/leva/prompt.rb +14 -1
data/app/models/leva/runner_result.rb +54 -0
data/app/views/layouts/leva/application.html.erb +24 -13
data/app/views/leva/dataset_records/index.html.erb +49 -0
data/app/views/leva/dataset_records/show.html.erb +30 -0
data/app/views/leva/datasets/_dataset.html.erb +18 -0
data/app/views/leva/datasets/_form.html.erb +24 -0
data/app/views/leva/datasets/edit.html.erb +5 -0
data/app/views/leva/datasets/index.html.erb +51 -38
data/app/views/leva/datasets/new.html.erb +5 -0
data/app/views/leva/datasets/show.html.erb +160 -8
data/app/views/leva/experiments/_experiment.html.erb +42 -0
data/app/views/leva/experiments/_form.html.erb +49 -0
data/app/views/leva/experiments/edit.html.erb +5 -0
data/app/views/leva/experiments/index.html.erb +53 -37
data/app/views/leva/experiments/new.html.erb +5 -0
data/app/views/leva/experiments/show.html.erb +115 -19
data/app/views/leva/runner_results/show.html.erb +64 -0
data/app/views/leva/workbench/_evaluation_area.html.erb +5 -0
data/app/views/leva/workbench/_prompt_content.html.erb +216 -0
data/app/views/leva/workbench/_prompt_form.html.erb +89 -0
data/app/views/leva/workbench/_prompt_sidebar.html.erb +21 -0
data/app/views/leva/workbench/_results_section.html.erb +159 -0
data/app/views/leva/workbench/_top_bar.html.erb +10 -0
data/app/views/leva/workbench/edit.html.erb +20 -0
data/app/views/leva/workbench/index.html.erb +5 -91
data/app/views/leva/workbench/new.html.erb +79 -36
data/config/routes.rb +15 -6
data/db/migrate/20240813172916_create_leva_datasets.rb +1 -0
data/db/migrate/20240813173033_create_leva_dataset_records.rb +1 -1
data/db/migrate/20240813173035_create_leva_experiments.rb +3 -2
data/db/migrate/20240813173050_create_leva_evaluation_results.rb +2 -2
data/db/migrate/20240816201419_create_leva_runner_results.rb +11 -0
data/db/migrate/20240816201433_update_leva_evaluation_results.rb +8 -0
data/db/migrate/20240821163608_make_experiment_optional_for_runner_results.rb +6 -0
data/db/migrate/20240821181934_add_prompt_to_leva_runner_results.rb +5 -0
data/db/migrate/20240821183153_add_runner_and_evaluator_to_leva_experiments.rb +6 -0
data/db/migrate/20240821191713_add_actual_result_to_leva_dataset_records.rb +5 -0
data/db/migrate/20240822143201_remove_actual_result_from_leva_runner_results.rb +5 -0
data/lib/generators/leva/templates/eval.rb.erb +6 -7
data/lib/leva/version.rb +1 -1
data/lib/leva.rb +62 -45
metadata +48 -5
data/app/evals/test_sentiment_accuracy_eval.rb +0 -6
data/app/runners/test_sentiment_run.rb +0 -13
data/lib/leva/base_eval.rb +0 -75

data/app/views/leva/workbench/new.html.erb CHANGED Viewed

@@ -1,38 +1,81 @@
-<% content_for :title, 'New Experiment' %>
-<div class="px-4 sm:px-6 lg:px-8">
-  <div class="sm:flex sm:items-center">
-    <div class="sm:flex-auto">
-      <h1 class="text-2xl font-semibold text-gray-900">New Experiment</h1>
-      <p class="mt-2 text-sm text-gray-700">Create a new experiment in your workbench.</p>
-    </div>
-  </div>
-  <div class="mt-8 max-w-xl">
-    <%= form_with(model: @experiment, url: experiments_path, local: true, class: "space-y-8 divide-y divide-gray-200") do |form| %>
-      <div class="space-y-8 divide-y divide-gray-200">
-        <div>
-          <div>
-            <h3 class="text-lg leading-6 font-medium text-gray-900">Experiment Information</h3>
-            <p class="mt-1 text-sm text-gray-500">Provide details for your new experiment.</p>
-          </div>
-          <div class="mt-6 grid grid-cols-1 gap-y-6 gap-x-4 sm:grid-cols-6">
-            <div class="sm:col-span-4">
-              <%= form.label :name, class: "block text-sm font-medium text-gray-700" %>
-              <div class="mt-1">
-                <%= form.text_field :name, class: "shadow-sm focus:ring-indigo-500 focus:border-indigo-500 block w-full sm:text-sm border-gray-300 rounded-md" %>
-              </div>
-            </div>
-            <div class="sm:col-span-2">
-              <%= form.label :dataset_id, class: "block text-sm font-medium text-gray-700" %>
-              <div class="mt-1">
-                <%= form.select :dataset_id, options_for_select(@datasets.map { |dataset| [dataset.name, dataset.id] }), class: "shadow-sm focus:ring-indigo-500 focus:border-indigo-500 block w-full sm:text-sm border-gray-300 rounded-md" %>
-              </div>
-            </div>
-          </div>
-        </div>
-      </div>
-      <div class="pt-5">
-        <%= form.submit class: "btn btn-primary btn-block" %>
+<% content_for :title, "New Prompt" %>
+<div class="container mx-auto px-4 py-8 bg-gray-950 text-white">
+  <h1 class="text-3xl font-bold text-indigo-400 mb-6">New Prompt</h1>
+  <%= form_with(model: @prompt, url: workbench_index_path, local: true, class: "bg-gray-800 rounded-lg shadow-lg p-6", data: { controller: "prompt-selector" }) do |form| %>
+    <% if @prompt.errors.any? %>
+      <div class="bg-red-900 border border-red-700 text-red-100 px-4 py-3 rounded-lg mb-4">
+        <h2><%= pluralize(@prompt.errors.count, "error") %> prohibited this prompt from being saved:</h2>
+        <ul>
+          <% @prompt.errors.full_messages.each do |message| %>
+            <li><%= message %></li>
+          <% end %>
+        </ul>
       </div>
     <% end %>
-  </div>
-</div>
+    <div class="mb-4">
+      <%= form.label :name, class: "block text-sm font-semibold mb-2 text-indigo-300" %>
+      <%= form.text_field :name, autofocus: true, class: "w-full bg-gray-700 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none" %>
+    </div>
+    <div class="mb-4">
+      <%= form.label :system_prompt, class: "block text-sm font-semibold mb-2 text-indigo-300" %>
+      <%= form.text_area :system_prompt, rows: 2, class: "w-full bg-gray-700 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none" %>
+    </div>
+    <div class="mb-4">
+      <%= form.label :predefined_prompt, "Select Predefined Prompt", class: "block text-sm font-semibold mb-2 text-indigo-300" %>
+      <%= form.select :predefined_prompt,
+          options_for_select([['Custom Prompt', '']] + @predefined_prompts.map { |name, content| [name, content] }),
+          {},
+          class: "w-full bg-gray-700 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none",
+          data: { action: "change->prompt-selector#toggleUserPrompt" }
+      %>
+    </div>
+    <div class="mb-4" data-prompt-selector-target="userPromptField">
+      <%= form.label :user_prompt, class: "block text-sm font-semibold mb-2 text-indigo-300" %>
+      <%= form.text_area :user_prompt, rows: 5, class: "w-full bg-gray-700 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none" %>
+    </div>
+    <div class="mb-4 hidden" data-prompt-selector-target="promptPreview">
+      <h3 class="text-lg font-semibold mb-2 text-indigo-300">Prompt Preview</h3>
+      <div class="bg-gray-700 text-white p-3 rounded-lg" data-prompt-selector-target="previewContent"></div>
+    </div>
+    <div class="flex items-center justify-end space-x-4">
+      <%= link_to "Cancel", workbench_index_path, class: "px-3 py-2 rounded-md text-sm font-medium text-gray-300 hover:bg-gray-800 hover:text-white transition-colors duration-150 ease-in-out" %>
+      <%= form.submit "Create Prompt", class: "px-3 py-2 rounded-md text-sm font-medium bg-indigo-600 text-white shadow-lg hover:bg-indigo-700 transition-colors duration-150 ease-in-out" %>
+    </div>
+  <% end %>
+</div>
+<script>
+  (() => {
+    const application = Stimulus.Application.start()
+    application.register("prompt-selector", class extends Stimulus.Controller {
+      static targets = ["userPromptField", "promptPreview", "previewContent"]
+      toggleUserPrompt(event) {
+        const selectedContent = event.target.value
+        if (selectedContent) {
+          this.userPromptFieldTarget.style.display = 'none'
+          this.promptPreviewTarget.classList.remove('hidden')
+          this.loadPredefinedPrompt(selectedContent)
+        } else {
+          this.userPromptFieldTarget.style.display = 'block'
+          this.promptPreviewTarget.classList.add('hidden')
+          this.clearUserPrompt()
+        }
+      }
+      loadPredefinedPrompt(content) {
+        const userPromptTextarea = this.userPromptFieldTarget.querySelector('textarea')
+        userPromptTextarea.value = content
+        this.previewContentTarget.innerHTML = marked.parse(content)
+      }
+      clearUserPrompt() {
+        const userPromptTextarea = this.userPromptFieldTarget.querySelector('textarea')
+        userPromptTextarea.value = ''
+        this.previewContentTarget.innerHTML = ''
+      }
+    })
+  })()
+</script>
+<!-- Include marked.js for Markdown parsing -->
+<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>

data/config/routes.rb CHANGED Viewed

@@ -1,12 +1,21 @@
 Leva::Engine.routes.draw do
   root 'workbench#index'
-  resources :datasets
-  resources :experiments
+  resources :datasets do
+    resources :dataset_records, path: 'records', only: [:index, :show]
+  end
+  resources :experiments, except: [:destroy] do
+    member do
+      post :rerun
+    end
+    resources :runner_results, only: [:show]
+  end
   resources :prompts
-  resources :workbench, only: [:index, :new, :show] do
-    post 'run', on: :collection
-    post 'run_with_evaluation', on: :collection
-    post 'run_evaluator', on: :collection
+  resources :workbench, only: [:index, :new, :create, :edit, :update] do
+    collection do
+      post 'run'
+      post 'run_all_evals'
+      post 'run_evaluator'
+    end
   end
 end

data/db/migrate/20240813172916_create_leva_datasets.rb CHANGED Viewed

@@ -2,6 +2,7 @@ class CreateLevaDatasets < ActiveRecord::Migration[7.2]
   def change
     create_table :leva_datasets do |t|
       t.string :name
+      t.text :description
       t.timestamps
     end

data/db/migrate/20240813173033_create_leva_dataset_records.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 class CreateLevaDatasetRecords < ActiveRecord::Migration[7.2]
   def change
     create_table :leva_dataset_records do |t|
-      t.references :leva_dataset, null: false, foreign_key: true
+      t.references :dataset, null: false, foreign_key: { to_table: :leva_datasets }
       t.references :recordable, polymorphic: true, null: false
       t.timestamps

data/db/migrate/20240813173035_create_leva_experiments.rb CHANGED Viewed

@@ -2,8 +2,9 @@ class CreateLevaExperiments < ActiveRecord::Migration[7.2]
   def change
     create_table :leva_experiments do |t|
       t.string :name
-      t.references :leva_dataset, null: false, foreign_key: true
-      t.references :leva_prompt, null: true, foreign_key: true
+      t.text :description
+      t.references :dataset, null: false, foreign_key: { to_table: :leva_datasets }
+      t.references :prompt, null: true, foreign_key: { to_table: :leva_prompts }
       t.integer :status
       t.text :metadata

data/db/migrate/20240813173050_create_leva_evaluation_results.rb CHANGED Viewed

@@ -1,8 +1,8 @@
 class CreateLevaEvaluationResults < ActiveRecord::Migration[7.2]
   def change
     create_table :leva_evaluation_results do |t|
-      t.references :leva_experiment, null: false, foreign_key: true
-      t.references :leva_dataset_record, null: false, foreign_key: true
+      t.references :experiment, null: false, foreign_key: { to_table: :leva_experiments }
+      t.references :dataset_record, null: false, foreign_key: { to_table: :leva_dataset_records }
       t.string :prediction
       t.float :score
       t.string :label

data/db/migrate/20240816201419_create_leva_runner_results.rb ADDED Viewed

@@ -0,0 +1,11 @@
+class CreateLevaRunnerResults < ActiveRecord::Migration[7.2]
+  def change
+    create_table :leva_runner_results do |t|
+      t.references :experiment, null: false, foreign_key: { to_table: :leva_experiments }
+      t.references :dataset_record, null: false, foreign_key: { to_table: :leva_dataset_records }
+      t.text :prediction
+      t.timestamps
+    end
+  end
+end

data/db/migrate/20240816201433_update_leva_evaluation_results.rb ADDED Viewed

@@ -0,0 +1,8 @@
+class UpdateLevaEvaluationResults < ActiveRecord::Migration[7.2]
+  def change
+    add_reference :leva_evaluation_results, :runner_result, null: false, foreign_key: { to_table: :leva_runner_results }
+    add_column :leva_evaluation_results, :evaluator_class, :string, null: false
+    remove_column :leva_evaluation_results, :prediction, :string
+    remove_column :leva_evaluation_results, :label, :string
+  end
+end

data/db/migrate/20240821163608_make_experiment_optional_for_runner_results.rb ADDED Viewed

@@ -0,0 +1,6 @@
+class MakeExperimentOptionalForRunnerResults < ActiveRecord::Migration[7.2]
+  def change
+    change_column_null :leva_runner_results, :experiment_id, true
+    change_column_null :leva_evaluation_results, :experiment_id, true
+  end
+end

data/db/migrate/20240821181934_add_prompt_to_leva_runner_results.rb ADDED Viewed

@@ -0,0 +1,5 @@
+class AddPromptToLevaRunnerResults < ActiveRecord::Migration[7.2]
+  def change
+    add_reference :leva_runner_results, :prompt, null: false, foreign_key: { to_table: :leva_prompts }
+  end
+end

data/db/migrate/20240821183153_add_runner_and_evaluator_to_leva_experiments.rb ADDED Viewed

@@ -0,0 +1,6 @@
+class AddRunnerAndEvaluatorToLevaExperiments < ActiveRecord::Migration[7.2]
+  def change
+    add_column :leva_experiments, :runner_class, :string
+    add_column :leva_experiments, :evaluator_classes, :text
+  end
+end

data/db/migrate/20240821191713_add_actual_result_to_leva_dataset_records.rb ADDED Viewed

@@ -0,0 +1,5 @@
+class AddActualResultToLevaDatasetRecords < ActiveRecord::Migration[7.2]
+  def change
+    add_column :leva_runner_results, :actual_result, :text
+  end
+end

data/db/migrate/20240822143201_remove_actual_result_from_leva_runner_results.rb ADDED Viewed

@@ -0,0 +1,5 @@
+class RemoveActualResultFromLevaRunnerResults < ActiveRecord::Migration[7.2]
+  def change
+    remove_column :leva_runner_results, :actual_result, :text
+  end
+end

data/lib/generators/leva/templates/eval.rb.erb CHANGED Viewed

@@ -2,14 +2,13 @@
 class <%= class_name %>Eval < Leva::BaseEval
   # @param prediction [String] The prediction to evaluate
-  # @param record [YourRecordClass] The record to evaluate
-  # @return [Leva::Result] The result of the evaluation
-  def evaluate(prediction, record)
+  # @param recordable [YourRecordClass] The recordable object to evaluate
+  # @return [Float] The score of the evaluation
+  def evaluate(prediction, recordable)
     # Implement your evaluation logic here
+    # You can access the ground truth using recordable.ground_truth
-    Leva::Result.new(
-      label: "<%= file_name.underscore %>",
-      score: score
-    )
+    # Example implementation:
+    prediction == recordable.ground_truth ? 1.0 : 0.0
   end
 end

data/lib/leva/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Leva
-  VERSION = "0.1.4"
+  VERSION = "0.1.6"
 end

data/lib/leva.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 require "leva/version"
 require "leva/engine"
+require "liquid"
 module Leva
   # Runs an evaluation experiment with the given run and evals.
@@ -9,9 +10,34 @@ module Leva
   # @param evals [Array<Leva::BaseEval>] The evaluation implementations to use.
   # @return [void]
   def self.run_evaluation(experiment:, run:, evals:)
-    results = run.run(experiment)
+    experiment.update(status: :running)
+    experiment.dataset.dataset_records.find_each do |dataset_record|
+      runner_result = run.execute_and_store(experiment, dataset_record, experiment.prompt)
+      evals.each do |eval|
+        eval.evaluate_and_store(experiment, runner_result)
+      end
+    end
+    experiment.update(status: :completed)
+  rescue StandardError => e
+    experiment.update(status: :failed)
+    Rails.logger.error "Error in experiment #{experiment.name}: #{e.message}"
+  end
+  # Runs a single evaluation for a dataset record
+  #
+  # @param experiment [Leva::Experiment] The experiment to run.
+  # @param run [Leva::BaseRun] The run implementation to use.
+  # @param evals [Array<Leva::BaseEval>] The evaluation implementations to use.
+  # @param dataset_record [Leva::DatasetRecord] The dataset record to process.
+  # @return [void]
+  def self.run_single_evaluation(experiment:, run:, evals:, dataset_record:)
+    runner_result = run.execute_and_store(experiment, dataset_record, experiment.prompt)
     evals.each do |eval|
-      eval.evaluate_all(experiment, results)
+      eval.evaluate_and_store(experiment, runner_result)
     end
   end
@@ -29,20 +55,24 @@ module Leva
       raise NotImplementedError, "#{self.class} must implement #execute"
     end
-    # Runs the model on all records in an experiment.
+    # Executes the run on a given dataset record and stores the result.
     #
-    # @param experiment [Leva::Experiment] The experiment to run.
-    # @return [Hash] A hash mapping dataset_record_ids to their execution results.
-    def run(experiment)
+    # @param experiment [Leva::Experiment, nil] The experiment being run, if any.
+    # @param dataset_record [Leva::DatasetRecord] The dataset record to run the model on.
+    # @param prompt [Leva::Prompt] The prompt to store the version of.
+    # @return [Leva::RunnerResult] The stored runner result.
+    def execute_and_store(experiment, dataset_record, prompt)
+      # Expose these to the subclass execution
       @experiment = experiment
-      @prompt = experiment.prompt
+      @prompt = prompt
-      results = {}
-      experiment.dataset.dataset_records.find_each do |dataset_record|
-        result = execute(dataset_record.recordable)
-        results[dataset_record.id] = result
-      end
-      results
+      result = execute(dataset_record.recordable)
+      RunnerResult.create!(
+        experiment: experiment,
+        dataset_record: dataset_record,
+        prompt: prompt,
+        prediction: result,
+      )
     end
   end
@@ -51,47 +81,34 @@ module Leva
   # @abstract Subclass and override {#evaluate} to implement
   #   custom evaluation logic.
   class BaseEval
-    # Evaluates the model's prediction against the expected result.
+    # Evaluates the model's prediction against the ground truth.
     #
     # @param prediction [Object] The model's prediction.
-    # @param record [Object] The expected result.
-    # @return [Leva::Result] The evaluation result.
+    # @param recordable [Object] The recordable object containing the ground truth.
+    # @return [Float] The evaluation score.
     # @raise [NotImplementedError] if the method is not implemented in a subclass.
-    def evaluate(prediction, record)
+    def evaluate(prediction, recordable)
       raise NotImplementedError, "#{self.class} must implement #evaluate"
     end
-    # Evaluates all results for an experiment.
+    # Evaluates a single runner result and stores the evaluation.
     #
-    # @param experiment [Leva::Experiment] The experiment to evaluate.
-    # @param results [Hash] A hash mapping dataset_record_ids to their execution results.
-    # @return [void]
-    def evaluate_all(experiment, results)
-      experiment.dataset.dataset_records.find_each do |dataset_record|
-        prediction = results[dataset_record.id]
-        evaluation = evaluate(prediction, dataset_record.recordable)
-        Leva::EvaluationResult.create!(
-          experiment: experiment,
-          dataset_record: dataset_record,
-          prediction: prediction,
-          score: evaluation.score,
-          label: evaluation.label
-        )
-      end
-    end
-  end
+    # @param experiment [Leva::Experiment, nil] The experiment being evaluated, if any.
+    # @param runner_result [Leva::RunnerResult] The runner result to evaluate.
+    # @return [Leva::EvaluationResult] The stored evaluation result.
+    def evaluate_and_store(experiment, runner_result)
+      @experiment = experiment
+      @runner_result = runner_result
-  # Represents the result of an evaluation
-  class Result
-    attr_reader :label, :prediction, :score
+      score = evaluate(runner_result, runner_result.dataset_record.recordable)
-    # Initialize a new Result
-    # @param label [String] The label for the result
-    # @param score [Float] The score of the evaluation (0.0 to 1.0)
-    def initialize(label:, score:)
-      @label = label
-      @score = score
+      EvaluationResult.create!(
+        experiment: experiment,
+        dataset_record: runner_result.dataset_record,
+        runner_result: runner_result,
+        score: score,
+        evaluator_class: self.class.name
+      )
     end
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: leva
 version: !ruby/object:Gem::Version
-  version: 0.1.4
+  version: 0.1.6
 platform: ruby
 authors:
 - Kieran Klaassen
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-08-14 00:00:00.000000000 Z
+date: 2024-09-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rails
@@ -24,6 +24,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: 7.2.0
+- !ruby/object:Gem::Dependency
+  name: liquid
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 5.5.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 5.5.0
 description: Leva is a Ruby on Rails framework for evaluating Language Models (LLMs)
   using ActiveRecord datasets. It provides a flexible structure for creating experiments,
   managing datasets, and implementing various evaluation logic.
@@ -39,26 +53,49 @@ files:
 - app/assets/config/leva_manifest.js
 - app/assets/stylesheets/leva/application.css
 - app/controllers/leva/application_controller.rb
+- app/controllers/leva/dataset_records_controller.rb
 - app/controllers/leva/datasets_controller.rb
 - app/controllers/leva/experiments_controller.rb
+- app/controllers/leva/runner_results_controller.rb
 - app/controllers/leva/workbench_controller.rb
-- app/evals/test_sentiment_accuracy_eval.rb
 - app/helpers/leva/application_helper.rb
+- app/javascript/controllers/prompt_form_controller.js
+- app/javascript/controllers/prompt_selector_controller.js
 - app/jobs/leva/application_job.rb
 - app/jobs/leva/experiment_job.rb
+- app/jobs/leva/run_eval_job.rb
 - app/mailers/leva/application_mailer.rb
+- app/models/concerns/leva/recordable.rb
 - app/models/leva/application_record.rb
 - app/models/leva/dataset.rb
 - app/models/leva/dataset_record.rb
 - app/models/leva/evaluation_result.rb
 - app/models/leva/experiment.rb
 - app/models/leva/prompt.rb
-- app/runners/test_sentiment_run.rb
+- app/models/leva/runner_result.rb
 - app/views/layouts/leva/application.html.erb
+- app/views/leva/dataset_records/index.html.erb
+- app/views/leva/dataset_records/show.html.erb
+- app/views/leva/datasets/_dataset.html.erb
+- app/views/leva/datasets/_form.html.erb
+- app/views/leva/datasets/edit.html.erb
 - app/views/leva/datasets/index.html.erb
+- app/views/leva/datasets/new.html.erb
 - app/views/leva/datasets/show.html.erb
+- app/views/leva/experiments/_experiment.html.erb
+- app/views/leva/experiments/_form.html.erb
+- app/views/leva/experiments/edit.html.erb
 - app/views/leva/experiments/index.html.erb
+- app/views/leva/experiments/new.html.erb
 - app/views/leva/experiments/show.html.erb
+- app/views/leva/runner_results/show.html.erb
+- app/views/leva/workbench/_evaluation_area.html.erb
+- app/views/leva/workbench/_prompt_content.html.erb
+- app/views/leva/workbench/_prompt_form.html.erb
+- app/views/leva/workbench/_prompt_sidebar.html.erb
+- app/views/leva/workbench/_results_section.html.erb
+- app/views/leva/workbench/_top_bar.html.erb
+- app/views/leva/workbench/edit.html.erb
 - app/views/leva/workbench/index.html.erb
 - app/views/leva/workbench/new.html.erb
 - config/routes.rb
@@ -67,12 +104,18 @@ files:
 - db/migrate/20240813173034_create_leva_prompts.rb
 - db/migrate/20240813173035_create_leva_experiments.rb
 - db/migrate/20240813173050_create_leva_evaluation_results.rb
+- db/migrate/20240816201419_create_leva_runner_results.rb
+- db/migrate/20240816201433_update_leva_evaluation_results.rb
+- db/migrate/20240821163608_make_experiment_optional_for_runner_results.rb
+- db/migrate/20240821181934_add_prompt_to_leva_runner_results.rb
+- db/migrate/20240821183153_add_runner_and_evaluator_to_leva_experiments.rb
+- db/migrate/20240821191713_add_actual_result_to_leva_dataset_records.rb
+- db/migrate/20240822143201_remove_actual_result_from_leva_runner_results.rb
 - lib/generators/leva/eval_generator.rb
 - lib/generators/leva/runner_generator.rb
 - lib/generators/leva/templates/eval.rb.erb
 - lib/generators/leva/templates/runner.rb.erb
 - lib/leva.rb
-- lib/leva/base_eval.rb
 - lib/leva/engine.rb
 - lib/leva/version.rb
 - lib/tasks/auto_annotate_models.rake

data/app/evals/test_sentiment_accuracy_eval.rb DELETED Viewed

@@ -1,6 +0,0 @@
-class TestSentimentAccuracyEval < Leva::BaseEval
-  def evaluate(prediction, expected)
-    score = prediction == expected ? 1.0 : 0.0
-    Leva::Result.new(label: 'sentiment_accuracy', score: score)
-  end
-end

data/app/runners/test_sentiment_run.rb DELETED Viewed

@@ -1,13 +0,0 @@
-class TestSentimentRun < Leva::BaseRun
-  def execute(record)
-    # Simple sentiment analysis logic for testing
-    case record.content.downcase
-    when /love|great|excellent/
-      "Positive"
-    when /terrible|bad|awful/
-      "Negative"
-    else
-      "Neutral"
-    end
-  end
-end

data/lib/leva/base_eval.rb DELETED Viewed

@@ -1,75 +0,0 @@
-# frozen_string_literal: true
-module Leva
-  class BaseEval
-    class << self
-      attr_reader :dataset_record_class_name
-      # Set the dataset record class for the eval
-      # @param class_name [String] The name of the dataset record class
-      def leva_dataset_record_class(class_name)
-        @dataset_record_class_name = class_name
-      end
-      # Run the experiment
-      # @param experiment [Leva::Experiment] The experiment to run
-      def run_experiment(experiment)
-        new.run_experiment(experiment)
-      end
-    end
-    # Run the experiment
-    # @param experiment [Leva::Experiment] The experiment to run
-    def run_experiment(experiment)
-      @experiment = experiment
-      @experiment.update(status: :running)
-      @experiment.dataset.records.each do |record|
-        @record = record
-        unless @record.class_name == self.class.dataset_record_class_name
-          raise ArgumentError, "Record class #{@record.class_name} does not match expected class #{self.class.dataset_record_class_name}"
-        end
-        ExperimentJob.perform_later(self, @record)
-      end
-      @experiment.update(status: :completed)
-    rescue StandardError => e
-      @experiment.update(status: :failed)
-      Rails.logger.error "Error in experiment #{@experiment.name}: #{e.message}"
-    end
-    # Run the evaluation for a single record
-    # @param record [ActiveRecord::Base] The record to evaluate
-    # @return [Leva::Result] The result of the evaluation
-    def run_each(record)
-      raise NotImplementedError, "Subclasses must implement the 'run_each' method"
-    end
-    # Save the result of an evaluation
-    # @param result [Leva::Result] The result of the evaluation
-    def save_result(result)
-      Leva::EvaluationResult.create!(
-        experiment: @experiment,
-        dataset_record: Leva::DatasetRecord.find_by(recordable: @record, dataset: @experiment.dataset),
-        prediction: result.prediction,
-        score: result.score,
-        label: result.label
-      )
-    end
-  end
-  # Represents the result of an evaluation
-  class Result
-    attr_reader :label, :prediction, :score
-    # Initialize a new Result
-    # @param label [String] The label for the result
-    # @param prediction [String] The prediction made by the evaluation
-    # @param score [Float] The score of the evaluation (0.0 to 1.0)
-    def initialize(label:, prediction:, score:)
-      @label = label
-      @prediction = prediction
-      @score = score
-    end
-  end
-end