lex-dataset 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 83cb04fc58c23760cc47ac99d49d2b2718754dbac8542388cc8c2cda55bb7fdc
|
|
4
|
+
data.tar.gz: 73e9f11620a34dcab3676db6fc1ed65da74cb05b2e3c26e46f4c171424b4ba67
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 78b6277e1931b68c34880789019e45edeca1634a0e4e563d7dd4ec48747c11e379509641cf16303f7a9893ca9bb6f479d8b3d780294f2626481081514092369b
|
|
7
|
+
data.tar.gz: 38c2f8b14e600146011c83f198b93b5fe89b9ef3cf1c6cdfa933a8b51217ce79b70d065430b212bf6cf16f15aaa44d824236fee659ffca1891cf612c0d2b7e79
|
|
@@ -8,6 +8,8 @@ module Legion
|
|
|
8
8
|
module Dataset
|
|
9
9
|
module Runners
|
|
10
10
|
module Dataset
|
|
11
|
+
extend self
|
|
12
|
+
|
|
11
13
|
def create_dataset(name:, description: nil, rows: [], **)
|
|
12
14
|
ds_id = db[:datasets].insert(name: name, description: description, created_at: Time.now.utc)
|
|
13
15
|
create_version(ds_id, rows)
|
|
@@ -62,7 +64,7 @@ module Legion
|
|
|
62
64
|
return { error: 'legion-llm is not available' } unless llm_available?
|
|
63
65
|
|
|
64
66
|
rows = call_llm_for_rows(description: description, count: count, schema: schema, model: model)
|
|
65
|
-
return rows if rows.is_a?(Hash) && rows[:error]
|
|
67
|
+
return rows if rows.is_a?(Hash) && rows[:error] # rubocop:disable Legion/Extension/RunnerReturnHash
|
|
66
68
|
|
|
67
69
|
result = create_dataset(name: name, description: description, rows: rows)
|
|
68
70
|
result.merge(generated: true)
|
|
@@ -92,14 +94,14 @@ module Legion
|
|
|
92
94
|
|
|
93
95
|
def invoke_llm(prompt:, **llm_opts)
|
|
94
96
|
result = if Legion::LLM.respond_to?(:structured)
|
|
95
|
-
Legion::LLM.structured(
|
|
97
|
+
Legion::LLM.structured( # rubocop:disable Legion/HelperMigration/DirectLlm
|
|
96
98
|
message: prompt,
|
|
97
99
|
schema: generate_schema,
|
|
98
100
|
caller: { extension: 'lex-dataset', operation: 'generate' },
|
|
99
101
|
**llm_opts
|
|
100
102
|
)
|
|
101
103
|
else
|
|
102
|
-
Legion::LLM.chat(message: prompt, caller: { extension: 'lex-dataset', operation: 'generate' }, **llm_opts)
|
|
104
|
+
Legion::LLM.chat(message: prompt, caller: { extension: 'lex-dataset', operation: 'generate' }, **llm_opts) # rubocop:disable Legion/HelperMigration/DirectLlm
|
|
103
105
|
end
|
|
104
106
|
content = result.respond_to?(:content) ? result.content : result.to_s
|
|
105
107
|
content.strip.sub(/\A```(?:json)?\n?/, '').sub(/\n?```\z/, '')
|
|
@@ -107,13 +109,13 @@ module Legion
|
|
|
107
109
|
|
|
108
110
|
def parse_llm_rows(content)
|
|
109
111
|
parsed = ::JSON.parse(content)
|
|
110
|
-
return nil unless parsed.is_a?(Array)
|
|
112
|
+
return nil unless parsed.is_a?(Array) # rubocop:disable Legion/Extension/RunnerReturnHash
|
|
111
113
|
|
|
112
114
|
parsed.map do |item|
|
|
113
115
|
h = item.transform_keys(&:to_sym)
|
|
114
116
|
{ input: h[:input].to_s, expected_output: h[:expected_output]&.to_s }
|
|
115
117
|
end
|
|
116
|
-
rescue ::JSON::ParserError
|
|
118
|
+
rescue ::JSON::ParserError => _e
|
|
117
119
|
nil
|
|
118
120
|
end
|
|
119
121
|
|
|
@@ -7,6 +7,8 @@ module Legion
|
|
|
7
7
|
module Dataset
|
|
8
8
|
module Runners
|
|
9
9
|
module Experiment
|
|
10
|
+
extend self
|
|
11
|
+
|
|
10
12
|
def run_experiment(name:, dataset_name:, task_callable:, dataset_version: nil, evaluators: [], **)
|
|
11
13
|
ds = get_dataset(name: dataset_name, version: dataset_version)
|
|
12
14
|
return { error: ds[:error] } if ds[:error]
|
|
@@ -69,7 +71,7 @@ module Legion
|
|
|
69
71
|
|
|
70
72
|
def load_experiment_results(name)
|
|
71
73
|
exp = db[:experiments].where(name: name).first
|
|
72
|
-
return nil unless exp
|
|
74
|
+
return nil unless exp # rubocop:disable Legion/Extension/RunnerReturnHash
|
|
73
75
|
|
|
74
76
|
db[:experiment_results].where(experiment_id: exp[:id]).order(:row_index).all
|
|
75
77
|
end
|
|
@@ -5,6 +5,8 @@ module Legion
|
|
|
5
5
|
module Dataset
|
|
6
6
|
module Runners
|
|
7
7
|
module Sampling
|
|
8
|
+
extend self
|
|
9
|
+
|
|
8
10
|
def sample_from_traces(dataset_name:, source: :legion_data, filters: {},
|
|
9
11
|
sample_size: nil, strategy: :recent, **)
|
|
10
12
|
traces = fetch_traces(source, filters)
|
|
@@ -52,7 +54,7 @@ module Legion
|
|
|
52
54
|
|
|
53
55
|
def sample_error_biased(traces, size)
|
|
54
56
|
errors, successes = traces.partition { |t| t[:status] == 'error' }
|
|
55
|
-
return traces unless size
|
|
57
|
+
return traces unless size # rubocop:disable Legion/Extension/RunnerReturnHash
|
|
56
58
|
|
|
57
59
|
half = size / 2
|
|
58
60
|
(errors.first(half) + successes.first(size - half)).first(size)
|
|
@@ -60,7 +62,7 @@ module Legion
|
|
|
60
62
|
|
|
61
63
|
def sample_stratified(traces, size)
|
|
62
64
|
groups = traces.group_by { |t| t[:span_kind] }
|
|
63
|
-
return traces unless size
|
|
65
|
+
return traces unless size # rubocop:disable Legion/Extension/RunnerReturnHash
|
|
64
66
|
|
|
65
67
|
per_group = [size / [groups.size, 1].max, 1].max
|
|
66
68
|
groups.values.flat_map { |g| g.first(per_group) }.first(size)
|