leva 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -1
- data/app/assets/stylesheets/leva/application.css +13 -0
- data/app/views/layouts/leva/application.html.erb +2 -1
- data/app/views/leva/workbench/_prompt_content.html.erb +9 -5
- data/app/views/leva/workbench/_results_section.html.erb +4 -4
- data/app/views/leva/workbench/new.html.erb +74 -3
- data/lib/leva/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e16081b74e57cb4f198b075e76bc5fc1c07f67e4a14c4d38e27f8df53e26f3cf
|
4
|
+
data.tar.gz: db9725fdc26fe3542ba8a1b1aaf2f22c2471e112c841224ce3587d1b6e83269c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e1019c0ec416f40854b5b9ef7c4e38df0141c98c5684f01ed26ea145deba92151a0ed908936768819eb1ab11167f2e5b9e075389ef8e8b25fb84786c1571f64d
|
7
|
+
data.tar.gz: 9de9c5e4ee512a66c8b6a72f5b5d8d7b6b316ddf4be13652df640433a40bf6a06b207cce38ef4bc8631619880d9587f3318a77a8722cee394e617c8fc2995039
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Leva - Flexible Evaluation Framework for Language Models
|
2
2
|
|
3
|
+
[](https://badge.fury.io/rb/leva)
|
4
|
+
|
3
5
|
Leva is a Ruby on Rails framework for evaluating Language Models (LLMs) using ActiveRecord datasets on production models. It provides a flexible structure for creating experiments, managing datasets, and implementing various evaluation logic on production data with security in mind.
|
4
6
|
|
5
7
|

|
@@ -194,4 +196,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
194
196
|
|
195
197
|
## Roadmap
|
196
198
|
|
197
|
-
- [
|
199
|
+
- [x] Parallelize evaluation
|
@@ -13,3 +13,16 @@
|
|
13
13
|
*= require_tree .
|
14
14
|
*= require_self
|
15
15
|
*/
|
16
|
+
|
17
|
+
/* Global styles for text overflow handling */
|
18
|
+
pre {
|
19
|
+
word-wrap: break-word;
|
20
|
+
word-break: break-word;
|
21
|
+
max-width: 100%;
|
22
|
+
}
|
23
|
+
|
24
|
+
textarea {
|
25
|
+
word-wrap: break-word;
|
26
|
+
word-break: break-word;
|
27
|
+
max-width: 100%;
|
28
|
+
}
|
@@ -8,6 +8,7 @@
|
|
8
8
|
<%= csp_meta_tag %>
|
9
9
|
<script src="https://cdn.tailwindcss.com"></script>
|
10
10
|
<script src="https://cdn.jsdelivr.net/npm/stimulus@3.2.2/dist/stimulus.umd.min.js"></script>
|
11
|
+
<script src="<%= asset_path 'custom/prompt_preview.js' %>"></script>
|
11
12
|
<%= yield(:head) %>
|
12
13
|
</head>
|
13
14
|
<body class="bg-gray-950 text-white">
|
@@ -38,4 +39,4 @@
|
|
38
39
|
<%= yield %>
|
39
40
|
</main>
|
40
41
|
</body>
|
41
|
-
</html>
|
42
|
+
</html>
|
@@ -12,7 +12,7 @@
|
|
12
12
|
</button>
|
13
13
|
</div>
|
14
14
|
<textarea
|
15
|
-
class="w-full bg-gray-800 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none min-h-[100px] overflow-
|
15
|
+
class="w-full bg-gray-800 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none min-h-[100px] overflow-y-auto resize-none break-words"
|
16
16
|
name="prompt[system_prompt]"
|
17
17
|
data-prompt-autosave-target="input"
|
18
18
|
id="systemPrompt"
|
@@ -31,7 +31,7 @@
|
|
31
31
|
</button>
|
32
32
|
</div>
|
33
33
|
<textarea
|
34
|
-
class="w-full bg-gray-800 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none min-h-[200px] overflow-
|
34
|
+
class="w-full bg-gray-800 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none min-h-[200px] overflow-y-auto resize-none break-words"
|
35
35
|
name="prompt[user_prompt]"
|
36
36
|
data-prompt-autosave-target="input"
|
37
37
|
id="userPrompt"
|
@@ -59,7 +59,7 @@
|
|
59
59
|
Copy
|
60
60
|
</button>
|
61
61
|
</summary>
|
62
|
-
<pre class="text-xs text-gray-300 mt-1 whitespace-pre-wrap" id="liquidTag<%= key %>"><%= value.to_s %></pre>
|
62
|
+
<pre class="text-xs text-gray-300 mt-1 whitespace-pre-wrap break-words overflow-x-auto max-w-full" id="liquidTag<%= key %>"><%= value.to_s %></pre>
|
63
63
|
</details>
|
64
64
|
<% end %>
|
65
65
|
</div>
|
@@ -77,7 +77,7 @@
|
|
77
77
|
Copy
|
78
78
|
</button>
|
79
79
|
</div>
|
80
|
-
<pre class="w-full bg-gray-800 text-white p-3 rounded-lg text-sm whitespace-pre-wrap" id="fullPrompt"><%= Liquid::Template.parse(@selected_prompt.user_prompt).render(@dataset_record.recordable.to_llm_context.stringify_keys) %></pre>
|
80
|
+
<pre class="w-full bg-gray-800 text-white p-3 rounded-lg text-sm whitespace-pre-wrap overflow-x-auto break-words max-w-full" id="fullPrompt"><%= Liquid::Template.parse(@selected_prompt.user_prompt).render(@dataset_record.recordable.to_llm_context.stringify_keys) %></pre>
|
81
81
|
</div>
|
82
82
|
<% end %>
|
83
83
|
<div class="text-sm text-center" data-prompt-autosave-target="status"></div>
|
@@ -102,7 +102,11 @@
|
|
102
102
|
const textareas = textarea ? [textarea] : this.inputTargets
|
103
103
|
textareas.forEach(ta => {
|
104
104
|
ta.style.height = 'auto'
|
105
|
-
ta.style.height = ta.scrollHeight + 'px'
|
105
|
+
ta.style.height = (ta.scrollHeight + 5) + 'px'
|
106
|
+
|
107
|
+
// Ensure horizontal text wrapping
|
108
|
+
ta.style.wordBreak = 'break-word'
|
109
|
+
ta.style.wordWrap = 'break-word'
|
106
110
|
})
|
107
111
|
}
|
108
112
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
<div class="w-1/2 bg-gray-900 border-l border-gray-800 p-5 overflow-y-auto" data-controller="button-loader">
|
1
|
+
<div class="w-1/2 bg-gray-900 border-l border-gray-800 p-5 overflow-y-auto overflow-x-hidden" data-controller="button-loader">
|
2
2
|
<!-- Runner Dropdown -->
|
3
3
|
<div class="mb-5">
|
4
4
|
<h3 class="text-sm font-semibold mb-2 text-indigo-300">Select Runner</h3>
|
@@ -42,17 +42,17 @@
|
|
42
42
|
<% if @dataset_record && (runner_result = @dataset_record.runner_results.last) %>
|
43
43
|
<div class="mb-3">
|
44
44
|
<h4 class="text-xs font-semibold text-indigo-200 mb-1">Ground Truth:</h4>
|
45
|
-
<pre class="text-sm text-gray-300 whitespace-pre-wrap bg-gray-700 p-2 rounded"><%= runner_result.ground_truth %></pre>
|
45
|
+
<pre class="text-sm text-gray-300 whitespace-pre-wrap break-words overflow-x-auto max-w-full bg-gray-700 p-2 rounded"><%= runner_result.ground_truth %></pre>
|
46
46
|
</div>
|
47
47
|
<div>
|
48
48
|
<h4 class="text-xs font-semibold text-indigo-200 mb-1">Raw Prediction:</h4>
|
49
|
-
<pre class="text-sm text-gray-300 whitespace-pre-wrap bg-gray-700 p-2 rounded"><%= runner_result.prediction %></pre>
|
49
|
+
<pre class="text-sm text-gray-300 whitespace-pre-wrap break-words overflow-x-auto max-w-full bg-gray-700 p-2 rounded"><%= runner_result.prediction %></pre>
|
50
50
|
</div>
|
51
51
|
<% if runner_result.dataset_record.recordable.extract_regex_pattern %>
|
52
52
|
<div>
|
53
53
|
<h4 class="text-xs font-semibold text-indigo-200 my-2 gap-2">Parsed Predictions: <%= runner_result.dataset_record.recordable.extract_regex_pattern.to_s %></h4>
|
54
54
|
<% runner_result.parsed_predictions.each do |prediction| %>
|
55
|
-
<pre class="text-sm text-gray-300 whitespace-pre-wrap bg-gray-700 p-2 rounded mb-2"><%= prediction %></pre>
|
55
|
+
<pre class="text-sm text-gray-300 whitespace-pre-wrap break-words overflow-x-auto max-w-full bg-gray-700 p-2 rounded mb-2"><%= prediction %></pre>
|
56
56
|
<% end %>
|
57
57
|
</div>
|
58
58
|
<% end %>
|
@@ -33,9 +33,30 @@
|
|
33
33
|
<%= form.label :user_prompt, class: "block text-sm font-semibold mb-2 text-indigo-300" %>
|
34
34
|
<%= form.text_area :user_prompt, rows: 5, class: "w-full bg-gray-700 text-white p-3 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:outline-none" %>
|
35
35
|
</div>
|
36
|
-
<div class="mb-4 hidden" data-prompt-selector-target="promptPreview">
|
36
|
+
<div class="mb-4 hidden" data-prompt-selector-target="promptPreview" id="prompt-preview">
|
37
37
|
<h3 class="text-lg font-semibold mb-2 text-indigo-300">Prompt Preview</h3>
|
38
|
-
<div
|
38
|
+
<div
|
39
|
+
class="bg-gray-700 text-white p-3 rounded-lg whitespace-pre-wrap"
|
40
|
+
data-prompt-selector-target="previewContent"
|
41
|
+
id="preview-content"
|
42
|
+
></div>
|
43
|
+
|
44
|
+
<!-- show-full button, hidden until overflow is detected -->
|
45
|
+
<button
|
46
|
+
id="show-full-preview"
|
47
|
+
type="button"
|
48
|
+
class="mt-2 text-indigo-400 underline hidden"
|
49
|
+
>Show full</button>
|
50
|
+
|
51
|
+
<!-- HTML5 dialog for full text -->
|
52
|
+
<dialog id="full-preview-dialog" class="p-0 m-0">
|
53
|
+
<div class="bg-gray-800 text-white p-4 rounded-lg max-h-[80vh] overflow-auto whitespace-pre-wrap">
|
54
|
+
<div id="dialog-content"></div>
|
55
|
+
<div class="text-right mt-4">
|
56
|
+
<button id="close-full-preview" class="px-4 py-2 bg-indigo-600 rounded text-white">Close</button>
|
57
|
+
</div>
|
58
|
+
</div>
|
59
|
+
</dialog>
|
39
60
|
</div>
|
40
61
|
<div class="flex items-center justify-end space-x-4">
|
41
62
|
<%= link_to "Cancel", workbench_index_path, class: "px-3 py-2 rounded-md text-sm font-medium text-gray-300 hover:bg-gray-800 hover:text-white transition-colors duration-150 ease-in-out" %>
|
@@ -76,6 +97,56 @@
|
|
76
97
|
}
|
77
98
|
})
|
78
99
|
})()
|
100
|
+
|
101
|
+
// Prompt preview scrollbar and dialog functionality
|
102
|
+
document.addEventListener('DOMContentLoaded', () => {
|
103
|
+
const wrapper = document.getElementById('prompt-preview');
|
104
|
+
const preview = document.getElementById('preview-content');
|
105
|
+
const showBtn = document.getElementById('show-full-preview');
|
106
|
+
const dialog = document.getElementById('full-preview-dialog');
|
107
|
+
const dialogBody = document.getElementById('dialog-content');
|
108
|
+
const closeBtn = document.getElementById('close-full-preview');
|
109
|
+
|
110
|
+
if (!preview) return;
|
111
|
+
|
112
|
+
// Check if the preview content is already populated by Stimulus
|
113
|
+
const checkPreviewContent = () => {
|
114
|
+
if (preview.textContent.trim().length > 0) {
|
115
|
+
// Detect overflow
|
116
|
+
if (preview.scrollHeight > preview.clientHeight || preview.scrollWidth > preview.clientWidth) {
|
117
|
+
preview.style.maxHeight = '12em';
|
118
|
+
preview.style.overflow = 'auto';
|
119
|
+
showBtn.classList.remove('hidden');
|
120
|
+
}
|
121
|
+
} else {
|
122
|
+
// If not populated yet, check again after a short delay
|
123
|
+
setTimeout(checkPreviewContent, 100);
|
124
|
+
}
|
125
|
+
};
|
126
|
+
|
127
|
+
// Start checking once the wrapper is visible
|
128
|
+
const observer = new MutationObserver((mutations) => {
|
129
|
+
mutations.forEach((mutation) => {
|
130
|
+
if (mutation.type === 'attributes' && mutation.attributeName === 'class') {
|
131
|
+
if (!wrapper.classList.contains('hidden')) {
|
132
|
+
checkPreviewContent();
|
133
|
+
observer.disconnect(); // Stop observing once we've detected the change
|
134
|
+
}
|
135
|
+
}
|
136
|
+
});
|
137
|
+
});
|
138
|
+
|
139
|
+
observer.observe(wrapper, { attributes: true });
|
140
|
+
|
141
|
+
// Show full in dialog
|
142
|
+
showBtn.addEventListener('click', () => {
|
143
|
+
dialogBody.textContent = preview.textContent;
|
144
|
+
dialog.showModal();
|
145
|
+
});
|
146
|
+
|
147
|
+
// Close dialog
|
148
|
+
closeBtn.addEventListener('click', () => dialog.close());
|
149
|
+
});
|
79
150
|
</script>
|
80
151
|
<!-- Include marked.js for Markdown parsing -->
|
81
|
-
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
152
|
+
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
data/lib/leva/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: leva
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kieran Klaassen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-04-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|