kaba 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0819a2ca5b94fd6ce32a48fc973dd86d0c2234a073bf32ee9327e02f572b45b2'
4
- data.tar.gz: 203c7c8cfcc4d1d059c01b01467f8b897580c4422d25db85335a2b119e674401
3
+ metadata.gz: 17f06505d496fef06c186773df25fa37d7a2e465fac06682f51a300933e286d5
4
+ data.tar.gz: caefca3868f25c1cd15e6bf441ae3e595a2be4b5fb7574431c1a57284ca00327
5
5
  SHA512:
6
- metadata.gz: 8abc31a51664418d2c8df17d2ff370c71b3124df3eb4cd17b9f05d3162e75b7fe76945986e47ae6415d0d43147be83c9627a41e9b9f7181623dcb752e91a0873
7
- data.tar.gz: cc4078a6d7b4205d4403a4451ec95d4020c354809332788c755f0d2a2f6381f3d36d5206f96f7b04312a7ddf72348c2fadcf8ee519e63be4bcd730a76761b3bd
6
+ metadata.gz: '09d533d49ac00fd23230ae8ea81f2c189d81b285738142e323b07bb5ea9cbac3cd91ae8f41daa56c37ea1c5297d99573e8dac2c411ec5add299644e8d888ea1e'
7
+ data.tar.gz: 15751ae8ef5e041b5ac8638626bac0e938fe989a5647af4c59c8a9cdc803e0796889f4353adf0a5fefe0af0430187f82ca21ccdd9dcd0a53c88a3edd8829c45d
data/exe/kaba CHANGED
@@ -2,22 +2,18 @@
2
2
  require "bundler/setup"
3
3
 
4
4
  require 'json'
5
- require "kaba"
6
-
7
5
  require 'dotenv'
8
6
  Dotenv.load
9
7
 
10
- class Application
11
- class << self
12
- def connection
13
- endpoint = ENV["LISA_TYPECHAT_ENDPOINT"] || "https://lisa-typechat.listenai.com"
14
- @connection ||= Faraday.new(endpoint) do |faraday|
15
- faraday.adapter :async_http, clients: Async::HTTP::Faraday::PersistentClients
16
- faraday.request :json
17
- end
18
- end
19
- end
20
- end
8
+ require "kaba"
21
9
 
22
- # 运行 DPodfile 文件,DPodfile 是一个 Ruby 文件
23
- load DatasetSource.podfile
10
+ if ARGV[0] == 'test'
11
+ # 运行测试程序
12
+ load DatasetSource.testfile
13
+ elsif ARGV[0] == 'version'
14
+ # 显示版本号
15
+ puts Kaba::VERSION
16
+ else
17
+ # 运行 DPodfile 文件,DPodfile 是一个 Ruby 文件
18
+ load DatasetSource.podfile
19
+ end
data/kaba.gemspec CHANGED
@@ -37,6 +37,8 @@ Gem::Specification.new do |spec|
37
37
  spec.add_dependency "colorize", "~> 1.1"
38
38
  spec.add_dependency "tty-progressbar", "~> 0.18.3"
39
39
  spec.add_dependency "dotenv", "~> 3.1"
40
+ spec.add_dependency "ruby-openai", "~> 7.3"
41
+ spec.add_dependency "json-repair", "~> 0.2.0"
40
42
 
41
43
  # For more information and examples about making a new gem, check out our
42
44
  # guide at: https://bundler.io/guides/creating_gem.html
data/lib/kaba/_DPodfile_ CHANGED
@@ -1,8 +1,6 @@
1
1
  ## 使用 Ruby 语言编写的数据集校验脚本
2
- # binding.irb 断点调试
3
2
  # 使用 colorize 来输出带颜色的信息,https://github.com/fazibear/colorize
4
3
  # 使用 progressbar 来显示进度条,https://github.com/piotrmurach/tty-progressbar
5
- # 设置数据集目录, 如果使用 Docker 方式运行,需要将数据集挂载到 /data 目录下,DatasetSource 会自动加载 /data 目录下的数据集
6
4
  source = DatasetSource.new(File.join(__dir__, 'data'))
7
5
  schema = source.schema.join('resume.ts').read
8
6
  type_name = 'Resume'
@@ -0,0 +1,24 @@
1
+ ## 使用 Ruby 语言编写的数据集校验脚本
2
+ # 使用 colorize 来输出带颜色的信息,https://github.com/fazibear/colorize
3
+ # 使用 progressbar 来显示进度条,https://github.com/piotrmurach/tty-progressbar
4
+
5
+ source = DatasetSource.new(File.join(__dir__, 'data'))
6
+ schema = source.schema.join('resume.ts').read
7
+ type_name = 'Resume'
8
+ prompt = Prompt.new(schema, type_name)
9
+ validate = Validate.new(schema: schema, type_name: type_name)
10
+
11
+
12
+ test_runner = TestRunner.new(
13
+ source.test,
14
+ schema: schema,
15
+ type_name: type_name,
16
+ prompt: prompt,
17
+ validate: validate
18
+ )
19
+
20
+ test_runner.scan(
21
+ limit: 1,
22
+ )
23
+
24
+ test_runner.save(source.join('report.html'))
@@ -0,0 +1,31 @@
1
+ class Application
2
+ class << self
3
+ def connection
4
+ endpoint = ENV["LISA_TYPECHAT_ENDPOINT"] || "https://lisa-typechat.listenai.com"
5
+ @connection ||= Faraday.new(endpoint) do |faraday|
6
+ faraday.adapter :async_http, clients: Async::HTTP::Faraday::PersistentClients
7
+ faraday.request :json
8
+ end
9
+ end
10
+
11
+ def llm_client
12
+ @llm_client ||= OpenAI::Client.new(
13
+ access_token: env!("LISA_ACCESS_TOKEN"),
14
+ request_timeout: ENV.fetch("LISA_LLM_REQUEST_TIMEOUT", 120).to_i,
15
+ uri_base: ENV.fetch("LISA_LLM_URI_BASE", "https://api.listenai.com")
16
+ ) do |faraday|
17
+ faraday.adapter :async_http, clients: Async::HTTP::Faraday::PersistentClients
18
+ end
19
+ end
20
+
21
+ def llm_client_extra_headers=(headers)
22
+ OpenAI.configure do |config|
23
+ config.extra_headers = headers
24
+ end
25
+ end
26
+
27
+ def env!(name)
28
+ ENV[name] or raise "missing environment variable: #{name}"
29
+ end
30
+ end
31
+ end
data/lib/kaba/dataset.rb CHANGED
@@ -36,7 +36,9 @@ class Dataset
36
36
  end
37
37
 
38
38
  def scan(limit: nil)
39
- progressbar = TTY::ProgressBar.new("Dataset: [:bar] :percent :current/:total", total: @data_files.size)
39
+ progressbar = TTY::ProgressBar.new(
40
+ "Dataset: [:bar] :percent :current/:total",
41
+ total: @data_files.first(limit || @data_files.size).size)
40
42
  Async do
41
43
  _each(limit: limit) do |row, ds|
42
44
  Async do
@@ -57,12 +59,4 @@ class Dataset
57
59
  end.wait
58
60
  end
59
61
 
60
- end
61
-
62
- class Row
63
- attr_reader :target_path, :input_file
64
- def initialize(file)
65
- @target_path = File.expand_path(file)
66
- @input_file = @target_path.sub(/\.target\.json$/, '.input.txt')
67
- end
68
62
  end
@@ -5,7 +5,7 @@ class DatasetSource
5
5
  @path = path
6
6
  end
7
7
 
8
- [:row, :schema].each do |method_name|
8
+ [:row, :schema, :test].each do |method_name|
9
9
  define_method(method_name) do
10
10
  self.class.new(File.join(@path, method_name.to_s))
11
11
  end
@@ -29,13 +29,22 @@ class DatasetSource
29
29
 
30
30
  class << self
31
31
  def podfile
32
- d_podfile_path = File.join(Dir.pwd, 'DPodfile')
32
+ d_podfile_path = File.join(Dir.pwd, 'DPodfile.rb')
33
33
  unless File.exist?(d_podfile_path)
34
34
  FileUtils.cp(File.join(__dir__, '_DPodfile_'), d_podfile_path)
35
35
  end
36
36
  d_podfile_path
37
37
  end
38
+
39
+ def testfile
40
+ d_testfile_path = File.join(Dir.pwd, 'DTestfile.rb')
41
+ unless File.exist?(d_testfile_path)
42
+ FileUtils.cp(File.join(__dir__, '_DTestfile_'), d_testfile_path)
43
+ end
44
+ d_testfile_path
45
+ end
38
46
  end
39
47
 
48
+
40
49
  end
41
50
 
data/lib/kaba/json.rb ADDED
@@ -0,0 +1,15 @@
1
+ module JSON
2
+
3
+ def self.parse_llm_response(response_text)
4
+ start_index = response_text.index('{')
5
+ end_index = response_text.rindex('}')
6
+
7
+ unless start_index && end_index && end_index > start_index
8
+ raise "Invalid JSON response: #{response_text}"
9
+ end
10
+
11
+ json_text = response_text[start_index..end_index]
12
+ JSON.parse JSON.repair(json_text)
13
+ end
14
+
15
+ end
@@ -0,0 +1,46 @@
1
+ 【系统】
2
+ 请作为一个公正的裁判,评估下面给定用户问题的AI助手所提供回答的质量。您的评估应该考虑以下因素:
3
+ * 理解:仅考虑回答的扣题程度,不考虑回答的正确性。
4
+ * 核心需求是否理解;
5
+ * 非核心需求是否理解;
6
+ * 生成:考虑(1)回答和问题的相关性、(2)生成文本的质量。
7
+ * 核心需求是否体现在答案里;
8
+ * 核心需求体现在答案,但是否正确实现。
9
+ * 逻辑:考虑回答的逻辑正确性与一致性
10
+ * 创作/问答的逻辑主要指的是行文逻辑、发展逻辑、论证逻辑等;
11
+ * 信息处理/代码/数学计算/逻辑推理的逻辑包括推理/计算步骤与答案正确性;
12
+ * 事实:前提是符合中国的国情和政治立场、法律法规和文化价值观要准确,主要指回答问题涉及的外部客观事实正确性,回复提供的信息要准确、真实、可靠、有帮助。
13
+ * 指令遵循:回答是否严格遵循用户问题的要求,比如是否提供了所有要求的信息,要按照给定样例格式输出回答,遇到选择或分类题应当直接输出答案而不用补充说明。
14
+ 请帮助我评估AI助手回答的好坏并给出对应的0到10得分,最终只需要给出一个综合的得分。
15
+ 【用户的问题】
16
+
17
+ {
18
+ "input": "<%= @input %>",
19
+ }
20
+
21
+ 【参考的回答】
22
+
23
+ [
24
+ {
25
+ "target": "<%= @target %>"
26
+ }
27
+ ]
28
+
29
+ 【助手的回答】
30
+
31
+ [
32
+ {
33
+ "output": "<%= @output %>"
34
+ }
35
+ ]
36
+
37
+ 【输出格式】
38
+
39
+ {
40
+ "reason": "",
41
+ "score": ""
42
+ }
43
+
44
+ 请注意区分您的最终任务和用户问题中提出的任务,最终的任务是完成评估打分任务,而不要直接回答给定的用户问题。
45
+ 请按照输出格式给出评分理由和助手回答的得分,不要输出json格式外的内容。
46
+ 【评估结果】
data/lib/kaba/judge.rb ADDED
@@ -0,0 +1,23 @@
1
+ require 'erb'
2
+
3
+ class Judge
4
+ def initialize(input: , target: , output:)
5
+ @input = input
6
+ @target = target
7
+ @output = output
8
+ end
9
+
10
+ def render
11
+ ERB.new(File.read(self.class.prompt_path)).result(binding)
12
+ end
13
+
14
+ class << self
15
+ def prompt_path
16
+ @prompt_path || File.join(__dir__, 'judge.md.erb')
17
+ end
18
+
19
+ def set_prompt_path(path)
20
+ @prompt_path = path
21
+ end
22
+ end
23
+ end
data/lib/kaba/prompt.rb CHANGED
@@ -13,9 +13,9 @@ class Prompt
13
13
  request_body = {
14
14
  schema: schema,
15
15
  typeName: @type_name,
16
- inpu: input
16
+ input: input
17
17
  }
18
- Application.connection.post('/prompt', request_body).body
18
+ resp = Application.connection.post('/prompt', request_body).body
19
19
  end
20
20
 
21
21
  class << self
@@ -0,0 +1,125 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Report</title>
7
+ <style>
8
+ body {
9
+ font-family: Arial, sans-serif;
10
+ margin: 20px;
11
+ line-height: 1.6;
12
+ }
13
+ .header {
14
+ margin-bottom: 20px;
15
+ }
16
+ .header h1 {
17
+ font-size: 24px;
18
+ }
19
+ .metrics {
20
+ font-size: 18px;
21
+ margin-bottom: 10px;
22
+ }
23
+ .line-item {
24
+ margin-bottom: 20px;
25
+ border: 1px solid #ddd;
26
+ border-radius: 5px;
27
+ background-color: #f9f9f9;
28
+ }
29
+ .line-item h2 {
30
+ font-size: 18px;
31
+ margin: 0;
32
+ padding: 10px;
33
+ background-color: #f0f0f0;
34
+ cursor: pointer;
35
+ }
36
+ .line-item h2 .status {
37
+ font-size: 14px;
38
+ color: #666;
39
+ margin-left: 10px;
40
+ }
41
+ .line-item .content {
42
+ display: none;
43
+ padding: 10px;
44
+ }
45
+ .line-item pre {
46
+ background-color: #282c34;
47
+ color: #abb2bf;
48
+ padding: 10px;
49
+ overflow-x: auto;
50
+ border-radius: 5px;
51
+ }
52
+ </style>
53
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/styles/default.min.css">
54
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/highlight.min.js"></script>
55
+ <script>
56
+ document.addEventListener("DOMContentLoaded", () => {
57
+ // Initialize Highlight.js
58
+ hljs.highlightAll();
59
+
60
+ // Add toggle functionality
61
+ document.querySelectorAll(".line-item h2").forEach(header => {
62
+ header.addEventListener("click", () => {
63
+ const content = header.nextElementSibling;
64
+ content.style.display = content.style.display === "none" ? "block" : "none";
65
+ });
66
+ });
67
+ });
68
+ </script>
69
+ </head>
70
+ <body>
71
+ <div class="header">
72
+ <h1>Test Report</h1>
73
+ <div class="metrics">
74
+ <p><strong>测试总数: </strong><%= @lines.size %></p>
75
+ <p><strong>类型测试通过: </strong><%= @type_right_total %></p>
76
+ <p><strong>平均分: </strong><%= (@score_total.to_f / @lines.size).round(2) %></p>
77
+ </div>
78
+ </div>
79
+
80
+ <div class="content">
81
+ <% @lines.each_with_index do |line, index| %>
82
+ <div class="line-item">
83
+ <h2>
84
+ 测试路径: <%= line[:row].input_file %>
85
+ <span class="status">
86
+ <%= line[:type_check_response]["success"] ? "✅ 定义检查" : "❌ 定义检查" %> | 得分: <%= line[:judge_json]["score"] %>
87
+ </span>
88
+ </h2>
89
+
90
+ <div class="content">
91
+ <div class="section">
92
+ <strong>AI裁判输出:</strong>
93
+ <pre><code class="language-json"><%= JSON.pretty_generate line[:judge_json] %></code></pre>
94
+ </div>
95
+
96
+ <div class="section">
97
+ <strong>类型检查:</strong>
98
+ <pre><code class="language-json"><%= JSON.pretty_generate(line[:type_check_response]) %></code></pre>
99
+ </div>
100
+
101
+ <div class="section">
102
+ <strong>Prompt:</strong>
103
+ <pre><code class="language-markdown"><%= line[:input] %></code></pre>
104
+ </div>
105
+
106
+ <div class="section">
107
+ <strong>原始输出:</strong>
108
+ <pre><code class="language-markdown"><%= line[:output] %></code></pre>
109
+ </div>
110
+
111
+ <div class="section">
112
+ <strong>格式化输出:</strong>
113
+ <pre><code class="language-json"><%= JSON.pretty_generate(line[:output_json]) %></code></pre>
114
+ </div>
115
+
116
+ <div class="section">
117
+ <strong>目标结果:</strong>
118
+ <pre><code class="language-json"><%= line[:target] %></code></pre>
119
+ </div>
120
+ </div>
121
+ </div>
122
+ <% end %>
123
+ </div>
124
+ </body>
125
+ </html>
data/lib/kaba/row.rb ADDED
@@ -0,0 +1,7 @@
1
+ class Row
2
+ attr_reader :target_path, :input_file
3
+ def initialize(file)
4
+ @target_path = File.expand_path(file)
5
+ @input_file = @target_path.sub(/\.target\.json$/, '.input.txt')
6
+ end
7
+ end
@@ -0,0 +1,109 @@
1
+ require 'erb'
2
+
3
+ class TestRunner
4
+ def initialize(path, schema:, type_name:, prompt:, validate:)
5
+
6
+ @test_files = Dir.glob(File.join(File.expand_path(path), '*.target.json'))
7
+ @lines = []
8
+ @schema = schema
9
+ @type_name = type_name
10
+ @prompt = prompt || Prompt.new(@schema, @type_name)
11
+ @validate = validate || Validate.new(schema: @schema, type_name: @type_name)
12
+
13
+ @type_right_total = 0
14
+ @score_total = 0
15
+
16
+ end
17
+
18
+ def _each(limit: nil)
19
+ @test_files.first(limit || @test_files.size).each do |file|
20
+ yield(Row.new(file), self)
21
+ end
22
+ end
23
+
24
+ def scan(
25
+ limit: nil,
26
+ model: 'spark-general-4.0',
27
+ judge_model: 'spark-general-4.0',
28
+ judge_temperature: 0.1,
29
+ temperature: 0.1
30
+ )
31
+
32
+ progressbar = TTY::ProgressBar.new(
33
+ "Test: [:bar] :percent :current/:total",
34
+ total: @test_files.first(limit || @test_files.size).size
35
+ )
36
+
37
+ progressbar.start
38
+
39
+ Async do
40
+ _each(limit: limit) do |row|
41
+ Async do |task|
42
+ input = @prompt.render(File.read row.input_file)
43
+
44
+ target = <<~Markdown
45
+ ```json
46
+ #{JSON.pretty_generate(JSON.parse(File.read(row.target_path)))}
47
+ ```
48
+ Markdown
49
+ output = Application.llm_client.chat(
50
+ parameters: {
51
+ model: model,
52
+ messages: [ { role: 'user', content: input } ],
53
+ temperature: temperature,
54
+ }
55
+ ).dig("choices", 0, "message", "content")
56
+
57
+
58
+ output_json = JSON.parse_llm_response output
59
+
60
+ type_check_response = JSON.parse @validate.run(output_json).body
61
+ @type_right_total += 1 if type_check_response["success"]
62
+
63
+ judge_input = Judge.new(input: input, output: output, target: target).render
64
+ judge_response = Application.llm_client.chat(
65
+ parameters: {
66
+ model: judge_model,
67
+ messages: [ { role: 'user', content: judge_input } ],
68
+ temperature: judge_temperature,
69
+ }
70
+ ).dig("choices", 0, "message", "content")
71
+
72
+ judge_json = JSON.parse_llm_response judge_response
73
+ @score_total += judge_json["score"].to_i
74
+
75
+ @lines << {
76
+ row: row,
77
+ input: input,
78
+ output: output,
79
+ output_json: output_json,
80
+ type_check_response: type_check_response,
81
+ target: target,
82
+ judge_response: judge_response,
83
+ judge_json: judge_json,
84
+ }
85
+
86
+ progressbar.advance
87
+ end
88
+ end
89
+ end.wait
90
+ end
91
+
92
+ def save(file_path)
93
+ File.open(File.expand_path(file_path), 'w') do |file|
94
+ file.puts ERB.new(File.read(self.class.report_template_path)).result(binding)
95
+ end
96
+ end
97
+
98
+
99
+ class << self
100
+ def report_template_path
101
+ @report_template_path || File.join(__dir__, 'report.html.erb')
102
+ end
103
+
104
+ def report_template_path=(path)
105
+ @report_template_path = path
106
+ end
107
+ end
108
+
109
+ end
data/lib/kaba/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kaba
4
- VERSION = "0.2.3"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/kaba.rb CHANGED
@@ -5,12 +5,20 @@ require 'faraday'
5
5
  require 'colorize'
6
6
  require 'tty-progressbar'
7
7
  require 'async/http/faraday'
8
+ require 'openai'
9
+ require 'json/repair'
8
10
 
11
+ require_relative "kaba/application"
12
+ require_relative "kaba/json"
13
+
14
+ require_relative "kaba/row"
9
15
  require_relative "kaba/version"
10
16
  require_relative "kaba/dataset"
11
17
  require_relative "kaba/dataset_source"
12
18
  require_relative "kaba/prompt"
13
19
  require_relative "kaba/validate"
20
+ require_relative "kaba/judge"
21
+ require_relative "kaba/test_runner"
14
22
 
15
23
  module Kaba
16
24
  class Error < StandardError; end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kaba
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - MJ
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-11-14 00:00:00.000000000 Z
11
+ date: 2024-11-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: async
@@ -94,6 +94,34 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '3.1'
97
+ - !ruby/object:Gem::Dependency
98
+ name: ruby-openai
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '7.3'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '7.3'
111
+ - !ruby/object:Gem::Dependency
112
+ name: json-repair
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.2.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.2.0
97
125
  description: 用来做数据集的工具
98
126
  email:
99
127
  - tywf91@gmail.com
@@ -110,9 +138,17 @@ files:
110
138
  - kaba.gemspec
111
139
  - lib/kaba.rb
112
140
  - lib/kaba/_DPodfile_
141
+ - lib/kaba/_DTestfile_
142
+ - lib/kaba/application.rb
113
143
  - lib/kaba/dataset.rb
114
144
  - lib/kaba/dataset_source.rb
145
+ - lib/kaba/json.rb
146
+ - lib/kaba/judge.md.erb
147
+ - lib/kaba/judge.rb
115
148
  - lib/kaba/prompt.rb
149
+ - lib/kaba/report.html.erb
150
+ - lib/kaba/row.rb
151
+ - lib/kaba/test_runner.rb
116
152
  - lib/kaba/validate.rb
117
153
  - lib/kaba/version.rb
118
154
  - sig/kaba.rbs