kaba 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/kaba +11 -21
- data/kaba.gemspec +2 -0
- data/lib/kaba/_DPodfile_ +2 -2
- data/lib/kaba/_DTestfile_ +24 -0
- data/lib/kaba/application.rb +31 -0
- data/lib/kaba/dataset.rb +3 -9
- data/lib/kaba/dataset_source.rb +11 -2
- data/lib/kaba/json.rb +15 -0
- data/lib/kaba/judge.md.erb +46 -0
- data/lib/kaba/judge.rb +23 -0
- data/lib/kaba/prompt.rb +2 -2
- data/lib/kaba/report.html.erb +125 -0
- data/lib/kaba/row.rb +7 -0
- data/lib/kaba/test_runner.rb +109 -0
- data/lib/kaba/validate.rb +3 -3
- data/lib/kaba/version.rb +1 -1
- data/lib/kaba.rb +14 -0
- metadata +38 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17f06505d496fef06c186773df25fa37d7a2e465fac06682f51a300933e286d5
|
4
|
+
data.tar.gz: caefca3868f25c1cd15e6bf441ae3e595a2be4b5fb7574431c1a57284ca00327
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '09d533d49ac00fd23230ae8ea81f2c189d81b285738142e323b07bb5ea9cbac3cd91ae8f41daa56c37ea1c5297d99573e8dac2c411ec5add299644e8d888ea1e'
|
7
|
+
data.tar.gz: 15751ae8ef5e041b5ac8638626bac0e938fe989a5647af4c59c8a9cdc803e0796889f4353adf0a5fefe0af0430187f82ca21ccdd9dcd0a53c88a3edd8829c45d
|
data/exe/kaba
CHANGED
@@ -1,29 +1,19 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require "bundler/setup"
|
3
3
|
|
4
|
-
require 'async'
|
5
|
-
require 'faraday'
|
6
|
-
require 'colorize'
|
7
|
-
require 'tty-progressbar'
|
8
|
-
require 'async/http/faraday'
|
9
|
-
|
10
4
|
require 'json'
|
11
|
-
require "kaba"
|
12
|
-
|
13
5
|
require 'dotenv'
|
14
6
|
Dotenv.load
|
15
7
|
|
16
|
-
|
17
|
-
class << self
|
18
|
-
def connection
|
19
|
-
endpoint = ENV["LISA_TYPECHAT_ENDPOINT"] || "https://lisa-typechat.listenai.com"
|
20
|
-
@connection ||= Faraday.new(endpoint) do |faraday|
|
21
|
-
faraday.adapter :async_http, clients: Async::HTTP::Faraday::PersistentClients
|
22
|
-
faraday.request :json
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
8
|
+
require "kaba"
|
27
9
|
|
28
|
-
|
29
|
-
|
10
|
+
if ARGV[0] == 'test'
|
11
|
+
# 运行测试程序
|
12
|
+
load DatasetSource.testfile
|
13
|
+
elsif ARGV[0] == 'version'
|
14
|
+
# 显示版本号
|
15
|
+
puts Kaba::VERSION
|
16
|
+
else
|
17
|
+
# 运行 DPodfile 文件,DPodfile 是一个 Ruby 文件
|
18
|
+
load DatasetSource.podfile
|
19
|
+
end
|
data/kaba.gemspec
CHANGED
@@ -37,6 +37,8 @@ Gem::Specification.new do |spec|
|
|
37
37
|
spec.add_dependency "colorize", "~> 1.1"
|
38
38
|
spec.add_dependency "tty-progressbar", "~> 0.18.3"
|
39
39
|
spec.add_dependency "dotenv", "~> 3.1"
|
40
|
+
spec.add_dependency "ruby-openai", "~> 7.3"
|
41
|
+
spec.add_dependency "json-repair", "~> 0.2.0"
|
40
42
|
|
41
43
|
# For more information and examples about making a new gem, check out our
|
42
44
|
# guide at: https://bundler.io/guides/creating_gem.html
|
data/lib/kaba/_DPodfile_
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
## 使用 Ruby 语言编写的数据集校验脚本
|
2
|
-
# binding.irb 断点调试
|
3
2
|
# 使用 colorize 来输出带颜色的信息,https://github.com/fazibear/colorize
|
4
3
|
# 使用 progressbar 来显示进度条,https://github.com/piotrmurach/tty-progressbar
|
5
|
-
# 设置数据集目录, 如果使用 Docker 方式运行,需要将数据集挂载到 /data 目录下,DatasetSource 会自动加载 /data 目录下的数据集
|
6
4
|
source = DatasetSource.new(File.join(__dir__, 'data'))
|
7
5
|
schema = source.schema.join('resume.ts').read
|
8
6
|
type_name = 'Resume'
|
@@ -24,6 +22,8 @@ validate.run_files(source.row)
|
|
24
22
|
dataset.scan()
|
25
23
|
dataset.save(source.join('train.jsonl'))
|
26
24
|
|
25
|
+
puts "Dataset 校验结果:#{dataset.validate}"
|
26
|
+
|
27
27
|
## 高级玩法,不要轻易尝试
|
28
28
|
#
|
29
29
|
### 可以加入 limit 来限制读取的文件数量,validate.run_files('./data/row', limit: 1) do |response, json, file|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
## 使用 Ruby 语言编写的数据集校验脚本
|
2
|
+
# 使用 colorize 来输出带颜色的信息,https://github.com/fazibear/colorize
|
3
|
+
# 使用 progressbar 来显示进度条,https://github.com/piotrmurach/tty-progressbar
|
4
|
+
|
5
|
+
source = DatasetSource.new(File.join(__dir__, 'data'))
|
6
|
+
schema = source.schema.join('resume.ts').read
|
7
|
+
type_name = 'Resume'
|
8
|
+
prompt = Prompt.new(schema, type_name)
|
9
|
+
validate = Validate.new(schema: schema, type_name: type_name)
|
10
|
+
|
11
|
+
|
12
|
+
test_runner = TestRunner.new(
|
13
|
+
source.test,
|
14
|
+
schema: schema,
|
15
|
+
type_name: type_name,
|
16
|
+
prompt: prompt,
|
17
|
+
validate: validate
|
18
|
+
)
|
19
|
+
|
20
|
+
test_runner.scan(
|
21
|
+
limit: 1,
|
22
|
+
)
|
23
|
+
|
24
|
+
test_runner.save(source.join('report.html'))
|
@@ -0,0 +1,31 @@
|
|
1
|
+
class Application
|
2
|
+
class << self
|
3
|
+
def connection
|
4
|
+
endpoint = ENV["LISA_TYPECHAT_ENDPOINT"] || "https://lisa-typechat.listenai.com"
|
5
|
+
@connection ||= Faraday.new(endpoint) do |faraday|
|
6
|
+
faraday.adapter :async_http, clients: Async::HTTP::Faraday::PersistentClients
|
7
|
+
faraday.request :json
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def llm_client
|
12
|
+
@llm_client ||= OpenAI::Client.new(
|
13
|
+
access_token: env!("LISA_ACCESS_TOKEN"),
|
14
|
+
request_timeout: ENV.fetch("LISA_LLM_REQUEST_TIMEOUT", 120).to_i,
|
15
|
+
uri_base: ENV.fetch("LISA_LLM_URI_BASE", "https://api.listenai.com")
|
16
|
+
) do |faraday|
|
17
|
+
faraday.adapter :async_http, clients: Async::HTTP::Faraday::PersistentClients
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def llm_client_extra_headers=(headers)
|
22
|
+
OpenAI.configure do |config|
|
23
|
+
config.extra_headers = headers
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def env!(name)
|
28
|
+
ENV[name] or raise "missing environment variable: #{name}"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/kaba/dataset.rb
CHANGED
@@ -36,7 +36,9 @@ class Dataset
|
|
36
36
|
end
|
37
37
|
|
38
38
|
def scan(limit: nil)
|
39
|
-
progressbar = TTY::ProgressBar.new(
|
39
|
+
progressbar = TTY::ProgressBar.new(
|
40
|
+
"Dataset: [:bar] :percent :current/:total",
|
41
|
+
total: @data_files.first(limit || @data_files.size).size)
|
40
42
|
Async do
|
41
43
|
_each(limit: limit) do |row, ds|
|
42
44
|
Async do
|
@@ -57,12 +59,4 @@ class Dataset
|
|
57
59
|
end.wait
|
58
60
|
end
|
59
61
|
|
60
|
-
end
|
61
|
-
|
62
|
-
class Row
|
63
|
-
attr_reader :target_path, :input_file
|
64
|
-
def initialize(file)
|
65
|
-
@target_path = File.expand_path(file)
|
66
|
-
@input_file = @target_path.sub(/\.target\.json$/, '.input.txt')
|
67
|
-
end
|
68
62
|
end
|
data/lib/kaba/dataset_source.rb
CHANGED
@@ -5,7 +5,7 @@ class DatasetSource
|
|
5
5
|
@path = path
|
6
6
|
end
|
7
7
|
|
8
|
-
[:row, :schema].each do |method_name|
|
8
|
+
[:row, :schema, :test].each do |method_name|
|
9
9
|
define_method(method_name) do
|
10
10
|
self.class.new(File.join(@path, method_name.to_s))
|
11
11
|
end
|
@@ -29,13 +29,22 @@ class DatasetSource
|
|
29
29
|
|
30
30
|
class << self
|
31
31
|
def podfile
|
32
|
-
d_podfile_path = File.join(Dir.pwd, 'DPodfile')
|
32
|
+
d_podfile_path = File.join(Dir.pwd, 'DPodfile.rb')
|
33
33
|
unless File.exist?(d_podfile_path)
|
34
34
|
FileUtils.cp(File.join(__dir__, '_DPodfile_'), d_podfile_path)
|
35
35
|
end
|
36
36
|
d_podfile_path
|
37
37
|
end
|
38
|
+
|
39
|
+
def testfile
|
40
|
+
d_testfile_path = File.join(Dir.pwd, 'DTestfile.rb')
|
41
|
+
unless File.exist?(d_testfile_path)
|
42
|
+
FileUtils.cp(File.join(__dir__, '_DTestfile_'), d_testfile_path)
|
43
|
+
end
|
44
|
+
d_testfile_path
|
45
|
+
end
|
38
46
|
end
|
39
47
|
|
48
|
+
|
40
49
|
end
|
41
50
|
|
data/lib/kaba/json.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module JSON
|
2
|
+
|
3
|
+
def self.parse_llm_response(response_text)
|
4
|
+
start_index = response_text.index('{')
|
5
|
+
end_index = response_text.rindex('}')
|
6
|
+
|
7
|
+
unless start_index && end_index && end_index > start_index
|
8
|
+
raise "Invalid JSON response: #{response_text}"
|
9
|
+
end
|
10
|
+
|
11
|
+
json_text = response_text[start_index..end_index]
|
12
|
+
JSON.parse JSON.repair(json_text)
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
【系统】
|
2
|
+
请作为一个公正的裁判,评估下面给定用户问题的AI助手所提供回答的质量。您的评估应该考虑以下因素:
|
3
|
+
* 理解:仅考虑回答的扣题程度,不考虑回答的正确性。
|
4
|
+
* 核心需求是否理解;
|
5
|
+
* 非核心需求是否理解;
|
6
|
+
* 生成:考虑(1)回答和问题的相关性、(2)生成文本的质量。
|
7
|
+
* 核心需求是否体现在答案里;
|
8
|
+
* 核心需求体现在答案,但是否正确实现。
|
9
|
+
* 逻辑:考虑回答的逻辑正确性与一致性
|
10
|
+
* 创作/问答的逻辑主要指的是行文逻辑、发展逻辑、论证逻辑等;
|
11
|
+
* 信息处理/代码/数学计算/逻辑推理的逻辑包括推理/计算步骤与答案正确性;
|
12
|
+
* 事实:前提是符合中国的国情和政治立场、法律法规和文化价值观要准确,主要指回答问题涉及的外部客观事实正确性,回复提供的信息要准确、真实、可靠、有帮助。
|
13
|
+
* 指令遵循:回答是否严格遵循用户问题的要求,比如是否提供了所有要求的信息,要按照给定样例格式输出回答,遇到选择或分类题应当直接输出答案而不用补充说明。
|
14
|
+
请帮助我评估AI助手回答的好坏并给出对应的0到10得分,最终只需要给出一个综合的得分。
|
15
|
+
【用户的问题】
|
16
|
+
|
17
|
+
{
|
18
|
+
"input": "<%= @input %>",
|
19
|
+
}
|
20
|
+
|
21
|
+
【参考的回答】
|
22
|
+
|
23
|
+
[
|
24
|
+
{
|
25
|
+
"target": "<%= @target %>"
|
26
|
+
}
|
27
|
+
]
|
28
|
+
|
29
|
+
【助手的回答】
|
30
|
+
|
31
|
+
[
|
32
|
+
{
|
33
|
+
"output": "<%= @output %>"
|
34
|
+
}
|
35
|
+
]
|
36
|
+
|
37
|
+
【输出格式】
|
38
|
+
|
39
|
+
{
|
40
|
+
"reason": "",
|
41
|
+
"score": ""
|
42
|
+
}
|
43
|
+
|
44
|
+
请注意区分您的最终任务和用户问题中提出的任务,最终的任务是完成评估打分任务,而不要直接回答给定的用户问题。
|
45
|
+
请按照输出格式给出评分理由和助手回答的得分,不要输出json格式外的内容。
|
46
|
+
【评估结果】
|
data/lib/kaba/judge.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'erb'
|
2
|
+
|
3
|
+
class Judge
|
4
|
+
def initialize(input: , target: , output:)
|
5
|
+
@input = input
|
6
|
+
@target = target
|
7
|
+
@output = output
|
8
|
+
end
|
9
|
+
|
10
|
+
def render
|
11
|
+
ERB.new(File.read(self.class.prompt_path)).result(binding)
|
12
|
+
end
|
13
|
+
|
14
|
+
class << self
|
15
|
+
def prompt_path
|
16
|
+
@prompt_path || File.join(__dir__, 'judge.md.erb')
|
17
|
+
end
|
18
|
+
|
19
|
+
def set_prompt_path(path)
|
20
|
+
@prompt_path = path
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/kaba/prompt.rb
CHANGED
@@ -13,9 +13,9 @@ class Prompt
|
|
13
13
|
request_body = {
|
14
14
|
schema: schema,
|
15
15
|
typeName: @type_name,
|
16
|
-
|
16
|
+
input: input
|
17
17
|
}
|
18
|
-
Application.connection.post('/prompt', request_body).body
|
18
|
+
resp = Application.connection.post('/prompt', request_body).body
|
19
19
|
end
|
20
20
|
|
21
21
|
class << self
|
@@ -0,0 +1,125 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="UTF-8">
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6
|
+
<title>Report</title>
|
7
|
+
<style>
|
8
|
+
body {
|
9
|
+
font-family: Arial, sans-serif;
|
10
|
+
margin: 20px;
|
11
|
+
line-height: 1.6;
|
12
|
+
}
|
13
|
+
.header {
|
14
|
+
margin-bottom: 20px;
|
15
|
+
}
|
16
|
+
.header h1 {
|
17
|
+
font-size: 24px;
|
18
|
+
}
|
19
|
+
.metrics {
|
20
|
+
font-size: 18px;
|
21
|
+
margin-bottom: 10px;
|
22
|
+
}
|
23
|
+
.line-item {
|
24
|
+
margin-bottom: 20px;
|
25
|
+
border: 1px solid #ddd;
|
26
|
+
border-radius: 5px;
|
27
|
+
background-color: #f9f9f9;
|
28
|
+
}
|
29
|
+
.line-item h2 {
|
30
|
+
font-size: 18px;
|
31
|
+
margin: 0;
|
32
|
+
padding: 10px;
|
33
|
+
background-color: #f0f0f0;
|
34
|
+
cursor: pointer;
|
35
|
+
}
|
36
|
+
.line-item h2 .status {
|
37
|
+
font-size: 14px;
|
38
|
+
color: #666;
|
39
|
+
margin-left: 10px;
|
40
|
+
}
|
41
|
+
.line-item .content {
|
42
|
+
display: none;
|
43
|
+
padding: 10px;
|
44
|
+
}
|
45
|
+
.line-item pre {
|
46
|
+
background-color: #282c34;
|
47
|
+
color: #abb2bf;
|
48
|
+
padding: 10px;
|
49
|
+
overflow-x: auto;
|
50
|
+
border-radius: 5px;
|
51
|
+
}
|
52
|
+
</style>
|
53
|
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/styles/default.min.css">
|
54
|
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/highlight.min.js"></script>
|
55
|
+
<script>
|
56
|
+
document.addEventListener("DOMContentLoaded", () => {
|
57
|
+
// Initialize Highlight.js
|
58
|
+
hljs.highlightAll();
|
59
|
+
|
60
|
+
// Add toggle functionality
|
61
|
+
document.querySelectorAll(".line-item h2").forEach(header => {
|
62
|
+
header.addEventListener("click", () => {
|
63
|
+
const content = header.nextElementSibling;
|
64
|
+
content.style.display = content.style.display === "none" ? "block" : "none";
|
65
|
+
});
|
66
|
+
});
|
67
|
+
});
|
68
|
+
</script>
|
69
|
+
</head>
|
70
|
+
<body>
|
71
|
+
<div class="header">
|
72
|
+
<h1>Test Report</h1>
|
73
|
+
<div class="metrics">
|
74
|
+
<p><strong>测试总数: </strong><%= @lines.size %></p>
|
75
|
+
<p><strong>类型测试通过: </strong><%= @type_right_total %></p>
|
76
|
+
<p><strong>平均分: </strong><%= (@score_total.to_f / @lines.size).round(2) %></p>
|
77
|
+
</div>
|
78
|
+
</div>
|
79
|
+
|
80
|
+
<div class="content">
|
81
|
+
<% @lines.each_with_index do |line, index| %>
|
82
|
+
<div class="line-item">
|
83
|
+
<h2>
|
84
|
+
测试路径: <%= line[:row].input_file %>
|
85
|
+
<span class="status">
|
86
|
+
<%= line[:type_check_response]["success"] ? "✅ 定义检查" : "❌ 定义检查" %> | 得分: <%= line[:judge_json]["score"] %>
|
87
|
+
</span>
|
88
|
+
</h2>
|
89
|
+
|
90
|
+
<div class="content">
|
91
|
+
<div class="section">
|
92
|
+
<strong>AI裁判输出:</strong>
|
93
|
+
<pre><code class="language-json"><%= JSON.pretty_generate line[:judge_json] %></code></pre>
|
94
|
+
</div>
|
95
|
+
|
96
|
+
<div class="section">
|
97
|
+
<strong>类型检查:</strong>
|
98
|
+
<pre><code class="language-json"><%= JSON.pretty_generate(line[:type_check_response]) %></code></pre>
|
99
|
+
</div>
|
100
|
+
|
101
|
+
<div class="section">
|
102
|
+
<strong>Prompt:</strong>
|
103
|
+
<pre><code class="language-markdown"><%= line[:input] %></code></pre>
|
104
|
+
</div>
|
105
|
+
|
106
|
+
<div class="section">
|
107
|
+
<strong>原始输出:</strong>
|
108
|
+
<pre><code class="language-markdown"><%= line[:output] %></code></pre>
|
109
|
+
</div>
|
110
|
+
|
111
|
+
<div class="section">
|
112
|
+
<strong>格式化输出:</strong>
|
113
|
+
<pre><code class="language-json"><%= JSON.pretty_generate(line[:output_json]) %></code></pre>
|
114
|
+
</div>
|
115
|
+
|
116
|
+
<div class="section">
|
117
|
+
<strong>目标结果:</strong>
|
118
|
+
<pre><code class="language-json"><%= line[:target] %></code></pre>
|
119
|
+
</div>
|
120
|
+
</div>
|
121
|
+
</div>
|
122
|
+
<% end %>
|
123
|
+
</div>
|
124
|
+
</body>
|
125
|
+
</html>
|
data/lib/kaba/row.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'erb'
|
2
|
+
|
3
|
+
class TestRunner
|
4
|
+
def initialize(path, schema:, type_name:, prompt:, validate:)
|
5
|
+
|
6
|
+
@test_files = Dir.glob(File.join(File.expand_path(path), '*.target.json'))
|
7
|
+
@lines = []
|
8
|
+
@schema = schema
|
9
|
+
@type_name = type_name
|
10
|
+
@prompt = prompt || Prompt.new(@schema, @type_name)
|
11
|
+
@validate = validate || Validate.new(schema: @schema, type_name: @type_name)
|
12
|
+
|
13
|
+
@type_right_total = 0
|
14
|
+
@score_total = 0
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
def _each(limit: nil)
|
19
|
+
@test_files.first(limit || @test_files.size).each do |file|
|
20
|
+
yield(Row.new(file), self)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def scan(
|
25
|
+
limit: nil,
|
26
|
+
model: 'spark-general-4.0',
|
27
|
+
judge_model: 'spark-general-4.0',
|
28
|
+
judge_temperature: 0.1,
|
29
|
+
temperature: 0.1
|
30
|
+
)
|
31
|
+
|
32
|
+
progressbar = TTY::ProgressBar.new(
|
33
|
+
"Test: [:bar] :percent :current/:total",
|
34
|
+
total: @test_files.first(limit || @test_files.size).size
|
35
|
+
)
|
36
|
+
|
37
|
+
progressbar.start
|
38
|
+
|
39
|
+
Async do
|
40
|
+
_each(limit: limit) do |row|
|
41
|
+
Async do |task|
|
42
|
+
input = @prompt.render(File.read row.input_file)
|
43
|
+
|
44
|
+
target = <<~Markdown
|
45
|
+
```json
|
46
|
+
#{JSON.pretty_generate(JSON.parse(File.read(row.target_path)))}
|
47
|
+
```
|
48
|
+
Markdown
|
49
|
+
output = Application.llm_client.chat(
|
50
|
+
parameters: {
|
51
|
+
model: model,
|
52
|
+
messages: [ { role: 'user', content: input } ],
|
53
|
+
temperature: temperature,
|
54
|
+
}
|
55
|
+
).dig("choices", 0, "message", "content")
|
56
|
+
|
57
|
+
|
58
|
+
output_json = JSON.parse_llm_response output
|
59
|
+
|
60
|
+
type_check_response = JSON.parse @validate.run(output_json).body
|
61
|
+
@type_right_total += 1 if type_check_response["success"]
|
62
|
+
|
63
|
+
judge_input = Judge.new(input: input, output: output, target: target).render
|
64
|
+
judge_response = Application.llm_client.chat(
|
65
|
+
parameters: {
|
66
|
+
model: judge_model,
|
67
|
+
messages: [ { role: 'user', content: judge_input } ],
|
68
|
+
temperature: judge_temperature,
|
69
|
+
}
|
70
|
+
).dig("choices", 0, "message", "content")
|
71
|
+
|
72
|
+
judge_json = JSON.parse_llm_response judge_response
|
73
|
+
@score_total += judge_json["score"].to_i
|
74
|
+
|
75
|
+
@lines << {
|
76
|
+
row: row,
|
77
|
+
input: input,
|
78
|
+
output: output,
|
79
|
+
output_json: output_json,
|
80
|
+
type_check_response: type_check_response,
|
81
|
+
target: target,
|
82
|
+
judge_response: judge_response,
|
83
|
+
judge_json: judge_json,
|
84
|
+
}
|
85
|
+
|
86
|
+
progressbar.advance
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end.wait
|
90
|
+
end
|
91
|
+
|
92
|
+
def save(file_path)
|
93
|
+
File.open(File.expand_path(file_path), 'w') do |file|
|
94
|
+
file.puts ERB.new(File.read(self.class.report_template_path)).result(binding)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
class << self
|
100
|
+
def report_template_path
|
101
|
+
@report_template_path || File.join(__dir__, 'report.html.erb')
|
102
|
+
end
|
103
|
+
|
104
|
+
def report_template_path=(path)
|
105
|
+
@report_template_path = path
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
data/lib/kaba/validate.rb
CHANGED
@@ -18,7 +18,7 @@ class Validate
|
|
18
18
|
# 读取某个文件然后运行
|
19
19
|
def run_file(file)
|
20
20
|
input = JSON.parse File.read(File.expand_path file)
|
21
|
-
ValidateReponse.new run(input)
|
21
|
+
ValidateReponse.new run(input), file: file
|
22
22
|
end
|
23
23
|
|
24
24
|
# 读取某个文件夹下的然后运行,运行有结果了 block 会被调用
|
@@ -75,8 +75,8 @@ class Validate
|
|
75
75
|
|
76
76
|
def to_s
|
77
77
|
s = "#{'success:'.colorize(:bold_blue)} #{success? ? 'true'.colorize(:green) : 'false'.colorize(:red)}"
|
78
|
-
s += "\n#{'file:'.colorize(:bold_blue)} #{file
|
79
|
-
s += "\n#{'message:'.colorize(:bold_blue)} #{message
|
78
|
+
s += "\n#{'file:'.colorize(:bold_blue)} #{file&.to_s&.colorize(:yellow)}"
|
79
|
+
s += "\n#{'message:'.colorize(:bold_blue)} #{message&.colorize(:yellow)}" unless success?
|
80
80
|
s += "\n\n"
|
81
81
|
end
|
82
82
|
end
|
data/lib/kaba/version.rb
CHANGED
data/lib/kaba.rb
CHANGED
@@ -1,10 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'async'
|
4
|
+
require 'faraday'
|
5
|
+
require 'colorize'
|
6
|
+
require 'tty-progressbar'
|
7
|
+
require 'async/http/faraday'
|
8
|
+
require 'openai'
|
9
|
+
require 'json/repair'
|
10
|
+
|
11
|
+
require_relative "kaba/application"
|
12
|
+
require_relative "kaba/json"
|
13
|
+
|
14
|
+
require_relative "kaba/row"
|
3
15
|
require_relative "kaba/version"
|
4
16
|
require_relative "kaba/dataset"
|
5
17
|
require_relative "kaba/dataset_source"
|
6
18
|
require_relative "kaba/prompt"
|
7
19
|
require_relative "kaba/validate"
|
20
|
+
require_relative "kaba/judge"
|
21
|
+
require_relative "kaba/test_runner"
|
8
22
|
|
9
23
|
module Kaba
|
10
24
|
class Error < StandardError; end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kaba
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MJ
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-11-
|
11
|
+
date: 2024-11-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: async
|
@@ -94,6 +94,34 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '3.1'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: ruby-openai
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '7.3'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '7.3'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: json-repair
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.2.0
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.2.0
|
97
125
|
description: 用来做数据集的工具
|
98
126
|
email:
|
99
127
|
- tywf91@gmail.com
|
@@ -110,9 +138,17 @@ files:
|
|
110
138
|
- kaba.gemspec
|
111
139
|
- lib/kaba.rb
|
112
140
|
- lib/kaba/_DPodfile_
|
141
|
+
- lib/kaba/_DTestfile_
|
142
|
+
- lib/kaba/application.rb
|
113
143
|
- lib/kaba/dataset.rb
|
114
144
|
- lib/kaba/dataset_source.rb
|
145
|
+
- lib/kaba/json.rb
|
146
|
+
- lib/kaba/judge.md.erb
|
147
|
+
- lib/kaba/judge.rb
|
115
148
|
- lib/kaba/prompt.rb
|
149
|
+
- lib/kaba/report.html.erb
|
150
|
+
- lib/kaba/row.rb
|
151
|
+
- lib/kaba/test_runner.rb
|
116
152
|
- lib/kaba/validate.rb
|
117
153
|
- lib/kaba/version.rb
|
118
154
|
- sig/kaba.rbs
|