leva 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2abd3b8bab0e39683850e7e95df8815268332022c2adf6846cd27442e880cb88
4
- data.tar.gz: 73c430d4a5a2c6a98dba70a36be988dd40541debbf10b274d62db8e232e931f7
3
+ metadata.gz: 9d8e48a71a34a53451bc1756a6cdc7694ee6ea9747861fdb977b0859979bd101
4
+ data.tar.gz: 83c44769f588b3daeeaa6bd750f9e9b643639871de57dd381cd84a76cd6177d6
5
5
  SHA512:
6
- metadata.gz: ee31e67dae95204cb6c6f2fc96ad41dae624ea48f6760b65571f8f1447fd24ca055a4dba33244067f0138db79a620ec82e8bbb3d3fd4363f9b6e98079b45b6a7
7
- data.tar.gz: fadb2838c4d31e498f40ed8e5d20859c57e4e2df433fae1251d3215ddcbc3a2c4843b5a7bd0b73bb4ff4e2b3abb55842b83d7a018ad7feaa40a4da89e6313adc
6
+ metadata.gz: 74ad608263e8fe369693537d76247757bd39417bbf1e034ffc2402684a9800ed25803d1ff3506d05776c9c523ad7fd84254e78290fc99f21f7180f67e1e12667
7
+ data.tar.gz: 3a2e4b9701d6cb63c05c2f672b44c8bfd16f0de5a6a98c63e4b6a13de6685efe001cf320304be1fa1aa6c6b56cec658ec0fb95e731fbf078055d5deaffb10959
data/README.md CHANGED
@@ -1,7 +1,12 @@
1
1
  # Leva - Flexible Evaluation Framework for Language Models
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/leva.svg)](https://badge.fury.io/rb/leva)
4
+
3
5
  Leva is a Ruby on Rails framework for evaluating Language Models (LLMs) using ActiveRecord datasets on production models. It provides a flexible structure for creating experiments, managing datasets, and implementing various evaluation logic on production data with security in mind.
4
6
 
7
+ ![Leva - Workbench- Google Chrome](https://github.com/user-attachments/assets/ee487941-e11b-4c2a-983b-771ef27dd73c)
8
+ ![Leva - rty- Google Chrome](https://github.com/user-attachments/assets/f9986a12-731b-4747-9f86-5ac6fffd5cbc)
9
+
5
10
  ## Installation
6
11
 
7
12
  Add this line to your application's Gemfile:
@@ -13,3 +13,16 @@
13
13
  *= require_tree .
14
14
  *= require_self
15
15
  */
16
+
17
+ /* Global styles for text overflow handling */
18
+ pre {
19
+ word-wrap: break-word;
20
+ word-break: break-word;
21
+ max-width: 100%;
22
+ }
23
+
24
+ textarea {
25
+ word-wrap: break-word;
26
+ word-break: break-word;
27
+ max-width: 100%;
28
+ }
@@ -3,6 +3,7 @@
3
3
  # Table name: leva_dataset_records
4
4
  #
5
5
  # id :integer not null, primary key
6
+ # actual_result :text
6
7
  # recordable_type :string not null
7
8
  # created_at :datetime not null
8
9
  # updated_at :datetime not null
@@ -61,4 +62,4 @@ module Leva
61
62
  end
62
63
  end
63
64
  end
64
- end
65
+ end
@@ -5,6 +5,7 @@
5
5
  # id :integer not null, primary key
6
6
  # prediction :text
7
7
  # prompt_version :integer
8
+ # runner_class :string
8
9
  # created_at :datetime not null
9
10
  # updated_at :datetime not null
10
11
  # dataset_record_id :integer not null
@@ -32,23 +33,24 @@ module Leva
32
33
 
33
34
  validates :prediction, presence: true
34
35
  validates :prompt, presence: true
36
+ validates :runner_class, presence: true
35
37
 
36
38
  delegate :ground_truth, to: :dataset_record
37
39
 
38
40
  # @return [Array<String>] The parsed draft responses
39
41
  def parsed_predictions
40
- @parsed_predictions ||=
41
- if extract_regex_pattern
42
- prediction.scan(extract_regex_pattern).map { |match| match.first&.strip }.compact
43
- else
44
- [prediction]
45
- end
42
+ @parsed_predictions ||= runner&.parsed_predictions(self) || []
43
+ end
44
+
45
+ # @return [String] The ground truth for this runner result
46
+ def ground_truth
47
+ @ground_truth ||= runner&.ground_truth(self)
46
48
  end
47
49
 
48
50
  private
49
51
 
50
- def extract_regex_pattern
51
- dataset_record.recordable.extract_regex_pattern
52
+ def runner
53
+ @runner ||= runner_class&.constantize&.new
52
54
  end
53
55
  end
54
- end
56
+ end
@@ -12,7 +12,7 @@
12
12
  </button>
13
13
  </div>
14
14
  <textarea
15
- class="w-full bg-gray-800 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none min-h-[100px] overflow-hidden resize-none"
15
+ class="w-full bg-gray-800 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none min-h-[100px] overflow-y-auto resize-none break-words"
16
16
  name="prompt[system_prompt]"
17
17
  data-prompt-autosave-target="input"
18
18
  id="systemPrompt"
@@ -31,7 +31,7 @@
31
31
  </button>
32
32
  </div>
33
33
  <textarea
34
- class="w-full bg-gray-800 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none min-h-[200px] overflow-hidden resize-none"
34
+ class="w-full bg-gray-800 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none min-h-[200px] overflow-y-auto resize-none break-words"
35
35
  name="prompt[user_prompt]"
36
36
  data-prompt-autosave-target="input"
37
37
  id="userPrompt"
@@ -59,7 +59,7 @@
59
59
  Copy
60
60
  </button>
61
61
  </summary>
62
- <pre class="text-xs text-gray-300 mt-1 whitespace-pre-wrap" id="liquidTag<%= key %>"><%= value.to_s %></pre>
62
+ <pre class="text-xs text-gray-300 mt-1 whitespace-pre-wrap break-words overflow-x-auto max-w-full" id="liquidTag<%= key %>"><%= value.to_s %></pre>
63
63
  </details>
64
64
  <% end %>
65
65
  </div>
@@ -77,7 +77,7 @@
77
77
  Copy
78
78
  </button>
79
79
  </div>
80
- <pre class="w-full bg-gray-800 text-white p-3 rounded-lg text-sm whitespace-pre-wrap" id="fullPrompt"><%= Liquid::Template.parse(@selected_prompt.user_prompt).render(@dataset_record.recordable.to_llm_context.stringify_keys) %></pre>
80
+ <pre class="w-full bg-gray-800 text-white p-3 rounded-lg text-sm whitespace-pre-wrap overflow-x-auto break-words max-w-full" id="fullPrompt"><%= Liquid::Template.parse(@selected_prompt.user_prompt).render(@dataset_record.recordable.to_llm_context.stringify_keys) %></pre>
81
81
  </div>
82
82
  <% end %>
83
83
  <div class="text-sm text-center" data-prompt-autosave-target="status"></div>
@@ -102,7 +102,11 @@
102
102
  const textareas = textarea ? [textarea] : this.inputTargets
103
103
  textareas.forEach(ta => {
104
104
  ta.style.height = 'auto'
105
- ta.style.height = ta.scrollHeight + 'px'
105
+ ta.style.height = (ta.scrollHeight + 5) + 'px'
106
+
107
+ // Ensure horizontal text wrapping
108
+ ta.style.wordBreak = 'break-word'
109
+ ta.style.wordWrap = 'break-word'
106
110
  })
107
111
  }
108
112
 
@@ -1,4 +1,4 @@
1
- <div class="w-1/2 bg-gray-900 border-l border-gray-800 p-5 overflow-y-auto" data-controller="button-loader">
1
+ <div class="w-1/2 bg-gray-900 border-l border-gray-800 p-5 overflow-y-auto overflow-x-hidden" data-controller="button-loader">
2
2
  <!-- Runner Dropdown -->
3
3
  <div class="mb-5">
4
4
  <h3 class="text-sm font-semibold mb-2 text-indigo-300">Select Runner</h3>
@@ -42,17 +42,17 @@
42
42
  <% if @dataset_record && (runner_result = @dataset_record.runner_results.last) %>
43
43
  <div class="mb-3">
44
44
  <h4 class="text-xs font-semibold text-indigo-200 mb-1">Ground Truth:</h4>
45
- <pre class="text-sm text-gray-300 whitespace-pre-wrap bg-gray-700 p-2 rounded"><%= @dataset_record.ground_truth %></pre>
45
+ <pre class="text-sm text-gray-300 whitespace-pre-wrap break-words overflow-x-auto max-w-full bg-gray-700 p-2 rounded"><%= runner_result.ground_truth %></pre>
46
46
  </div>
47
47
  <div>
48
48
  <h4 class="text-xs font-semibold text-indigo-200 mb-1">Raw Prediction:</h4>
49
- <pre class="text-sm text-gray-300 whitespace-pre-wrap bg-gray-700 p-2 rounded"><%= runner_result.prediction %></pre>
49
+ <pre class="text-sm text-gray-300 whitespace-pre-wrap break-words overflow-x-auto max-w-full bg-gray-700 p-2 rounded"><%= runner_result.prediction %></pre>
50
50
  </div>
51
51
  <% if runner_result.dataset_record.recordable.extract_regex_pattern %>
52
52
  <div>
53
53
  <h4 class="text-xs font-semibold text-indigo-200 my-2 gap-2">Parsed Predictions: <%= runner_result.dataset_record.recordable.extract_regex_pattern.to_s %></h4>
54
54
  <% runner_result.parsed_predictions.each do |prediction| %>
55
- <pre class="text-sm text-gray-300 whitespace-pre-wrap bg-gray-700 p-2 rounded mb-2"><%= prediction %></pre>
55
+ <pre class="text-sm text-gray-300 whitespace-pre-wrap break-words overflow-x-auto max-w-full bg-gray-700 p-2 rounded mb-2"><%= prediction %></pre>
56
56
  <% end %>
57
57
  </div>
58
58
  <% end %>
@@ -0,0 +1,5 @@
1
+ class AddRunnerClassToLevaRunnerResults < ActiveRecord::Migration[7.2]
2
+ def change
3
+ add_column :leva_runner_results, :runner_class, :string
4
+ end
5
+ end
@@ -1,14 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class <%= class_name %>Eval < Leva::BaseEval
4
- # @param prediction [String] The prediction to evaluate
4
+ # @param runner_result [Leva::RunnerResult] The runner result to evaluate
5
5
  # @param recordable [YourRecordClass] The recordable object to evaluate
6
6
  # @return [Float] The score of the evaluation
7
- def evaluate(prediction, recordable)
7
+ def evaluate(runner_result, recordable)
8
8
  # Implement your evaluation logic here
9
9
  # You can access the ground truth using recordable.ground_truth
10
10
 
11
11
  # Example implementation:
12
- prediction == recordable.ground_truth ? 1.0 : 0.0
12
+ runner_result.parsed_predictions.first == recordable.ground_truth ? 1.0 : 0.0
13
13
  end
14
14
  end
@@ -8,4 +8,29 @@ class <%= class_name %>Run < Leva::BaseRun
8
8
  # This could involve calling an API, running a local model, etc.
9
9
  # Return the result of the run to be used to evaluate the model
10
10
  end
11
+
12
+ # Uncomment and modify this method to customize parsed predictions
13
+ # @param runner_result [Leva::RunnerResult] The runner result to parse
14
+ # @return [Array<String>] The parsed predictions
15
+ # def parsed_predictions(runner_result)
16
+ # # Example: Extract predictions from XML-like tags
17
+ # runner_result.prediction.scan(/<prediction>(.*?)<\/prediction>/).flatten
18
+ # end
19
+
20
+ # Uncomment and modify this method to customize ground truth extraction
21
+ # @param runner_result [Leva::RunnerResult] The runner result to get ground truth from
22
+ # @return [String] The ground truth for the runner result
23
+ # def ground_truth(runner_result)
24
+ # # Example: Extract ground truth from a specific field
25
+ # runner_result.dataset_record.recordable.custom_ground_truth_field
26
+ # end
27
+
28
+ # Uncomment and modify this method to customize regex extraction
29
+ # @param runner_result [Leva::RunnerResult] The runner result to extract regex from
30
+ # @return [Regexp, nil] The regex pattern to use for parsing predictions
31
+ # def extract_regex_pattern(runner_result)
32
+ # # Your custom regex extraction logic here
33
+ # # For example:
34
+ # # /\<result\>(.*?)\<\/result\>/
35
+ # end
11
36
  end
data/lib/leva/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Leva
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.8"
3
3
  end
data/lib/leva.rb CHANGED
@@ -72,8 +72,31 @@ module Leva
72
72
  dataset_record: dataset_record,
73
73
  prompt: prompt,
74
74
  prediction: result,
75
+ runner_class: self.class.name
75
76
  )
76
77
  end
78
+
79
+ # @param runner_result [Leva::RunnerResult] The runner result to parse
80
+ # @return [Array<String>] The parsed predictions
81
+ def parsed_predictions(runner_result)
82
+ if extract_regex_pattern(runner_result)
83
+ runner_result.prediction.scan(extract_regex_pattern(runner_result)).map { |match| match.first&.strip }.compact
84
+ else
85
+ [runner_result.prediction]
86
+ end
87
+ end
88
+
89
+ # @param runner_result [Leva::RunnerResult] The runner result to extract regex from
90
+ # @return [Regexp, nil] The regex pattern to use for parsing predictions
91
+ def extract_regex_pattern(runner_result)
92
+ runner_result.dataset_record.recordable.extract_regex_pattern if runner_result.dataset_record.recordable.respond_to?(:extract_regex_pattern)
93
+ end
94
+
95
+ # @param runner_result [Leva::RunnerResult] The runner result to get ground truth from
96
+ # @return [String] The ground truth for the runner result
97
+ def ground_truth(runner_result)
98
+ runner_result.dataset_record.ground_truth
99
+ end
77
100
  end
78
101
 
79
102
  # Base class for all evaluation implementations in Leva.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: leva
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kieran Klaassen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-03 00:00:00.000000000 Z
11
+ date: 2025-03-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -111,6 +111,7 @@ files:
111
111
  - db/migrate/20240821183153_add_runner_and_evaluator_to_leva_experiments.rb
112
112
  - db/migrate/20240821191713_add_actual_result_to_leva_dataset_records.rb
113
113
  - db/migrate/20240822143201_remove_actual_result_from_leva_runner_results.rb
114
+ - db/migrate/20240912183556_add_runner_class_to_leva_runner_results.rb
114
115
  - lib/generators/leva/eval_generator.rb
115
116
  - lib/generators/leva/runner_generator.rb
116
117
  - lib/generators/leva/templates/eval.rb.erb