lex-dataset 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0114e9e266c0a6f2d4d88fefcf535a91bae559db9784d03a775dbdc1c9b95bf9
|
|
4
|
+
data.tar.gz: 28b59c1e639d99123f54d1f5630fd02df62c9b6912da06679d917dede310e71b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b9be421ed0c919084fd388f151e465f98e08b095e0bf1756358d2ec9bd813d88654a0dc0f60a5fbeb09cd3c1c9c058f10c056ae79011c5bae9a4cea42b862454
|
|
7
|
+
data.tar.gz: f0ed08bcc4d021ae90d489b3bce2fd1a47629bfe7ea8e55538945afc8ad6a8052f88316452d6d28fcafa69e952a018089b3ede1f42649e0e9216f4e4698afb4d
|
|
@@ -8,6 +8,12 @@ module Legion
|
|
|
8
8
|
module Dataset
|
|
9
9
|
module Runners
|
|
10
10
|
module Dataset
|
|
11
|
+
extend self
|
|
12
|
+
|
|
13
|
+
def self.remote_invocable?
|
|
14
|
+
false
|
|
15
|
+
end
|
|
16
|
+
|
|
11
17
|
def create_dataset(name:, description: nil, rows: [], **)
|
|
12
18
|
ds_id = db[:datasets].insert(name: name, description: description, created_at: Time.now.utc)
|
|
13
19
|
create_version(ds_id, rows)
|
|
@@ -62,7 +68,7 @@ module Legion
|
|
|
62
68
|
return { error: 'legion-llm is not available' } unless llm_available?
|
|
63
69
|
|
|
64
70
|
rows = call_llm_for_rows(description: description, count: count, schema: schema, model: model)
|
|
65
|
-
return rows if rows.is_a?(Hash) && rows[:error]
|
|
71
|
+
return rows if rows.is_a?(Hash) && rows[:error] # rubocop:disable Legion/Extension/RunnerReturnHash
|
|
66
72
|
|
|
67
73
|
result = create_dataset(name: name, description: description, rows: rows)
|
|
68
74
|
result.merge(generated: true)
|
|
@@ -92,14 +98,14 @@ module Legion
|
|
|
92
98
|
|
|
93
99
|
def invoke_llm(prompt:, **llm_opts)
|
|
94
100
|
result = if Legion::LLM.respond_to?(:structured)
|
|
95
|
-
Legion::LLM.structured(
|
|
101
|
+
Legion::LLM.structured( # rubocop:disable Legion/HelperMigration/DirectLlm
|
|
96
102
|
message: prompt,
|
|
97
103
|
schema: generate_schema,
|
|
98
104
|
caller: { extension: 'lex-dataset', operation: 'generate' },
|
|
99
105
|
**llm_opts
|
|
100
106
|
)
|
|
101
107
|
else
|
|
102
|
-
Legion::LLM.chat(message: prompt, caller: { extension: 'lex-dataset', operation: 'generate' }, **llm_opts)
|
|
108
|
+
Legion::LLM.chat(message: prompt, caller: { extension: 'lex-dataset', operation: 'generate' }, **llm_opts) # rubocop:disable Legion/HelperMigration/DirectLlm
|
|
103
109
|
end
|
|
104
110
|
content = result.respond_to?(:content) ? result.content : result.to_s
|
|
105
111
|
content.strip.sub(/\A```(?:json)?\n?/, '').sub(/\n?```\z/, '')
|
|
@@ -107,13 +113,13 @@ module Legion
|
|
|
107
113
|
|
|
108
114
|
def parse_llm_rows(content)
|
|
109
115
|
parsed = ::JSON.parse(content)
|
|
110
|
-
return nil unless parsed.is_a?(Array)
|
|
116
|
+
return nil unless parsed.is_a?(Array) # rubocop:disable Legion/Extension/RunnerReturnHash
|
|
111
117
|
|
|
112
118
|
parsed.map do |item|
|
|
113
119
|
h = item.transform_keys(&:to_sym)
|
|
114
120
|
{ input: h[:input].to_s, expected_output: h[:expected_output]&.to_s }
|
|
115
121
|
end
|
|
116
|
-
rescue ::JSON::ParserError
|
|
122
|
+
rescue ::JSON::ParserError => _e
|
|
117
123
|
nil
|
|
118
124
|
end
|
|
119
125
|
|
|
@@ -126,7 +132,8 @@ module Legion
|
|
|
126
132
|
if schema
|
|
127
133
|
lines << ''
|
|
128
134
|
lines << 'Schema guidance for inputs and outputs:'
|
|
129
|
-
lines <<
|
|
135
|
+
lines << ''
|
|
136
|
+
lines << ::JSON.generate(schema)
|
|
130
137
|
end
|
|
131
138
|
lines << ''
|
|
132
139
|
lines << 'Respond ONLY with a valid JSON array, no other text.'
|
|
@@ -7,6 +7,8 @@ module Legion
|
|
|
7
7
|
module Dataset
|
|
8
8
|
module Runners
|
|
9
9
|
module Experiment
|
|
10
|
+
extend self
|
|
11
|
+
|
|
10
12
|
def run_experiment(name:, dataset_name:, task_callable:, dataset_version: nil, evaluators: [], **)
|
|
11
13
|
ds = get_dataset(name: dataset_name, version: dataset_version)
|
|
12
14
|
return { error: ds[:error] } if ds[:error]
|
|
@@ -69,7 +71,7 @@ module Legion
|
|
|
69
71
|
|
|
70
72
|
def load_experiment_results(name)
|
|
71
73
|
exp = db[:experiments].where(name: name).first
|
|
72
|
-
return nil unless exp
|
|
74
|
+
return nil unless exp # rubocop:disable Legion/Extension/RunnerReturnHash
|
|
73
75
|
|
|
74
76
|
db[:experiment_results].where(experiment_id: exp[:id]).order(:row_index).all
|
|
75
77
|
end
|
|
@@ -5,6 +5,8 @@ module Legion
|
|
|
5
5
|
module Dataset
|
|
6
6
|
module Runners
|
|
7
7
|
module Sampling
|
|
8
|
+
extend self
|
|
9
|
+
|
|
8
10
|
def sample_from_traces(dataset_name:, source: :legion_data, filters: {},
|
|
9
11
|
sample_size: nil, strategy: :recent, **)
|
|
10
12
|
traces = fetch_traces(source, filters)
|
|
@@ -52,7 +54,7 @@ module Legion
|
|
|
52
54
|
|
|
53
55
|
def sample_error_biased(traces, size)
|
|
54
56
|
errors, successes = traces.partition { |t| t[:status] == 'error' }
|
|
55
|
-
return traces unless size
|
|
57
|
+
return traces unless size # rubocop:disable Legion/Extension/RunnerReturnHash
|
|
56
58
|
|
|
57
59
|
half = size / 2
|
|
58
60
|
(errors.first(half) + successes.first(size - half)).first(size)
|
|
@@ -60,7 +62,7 @@ module Legion
|
|
|
60
62
|
|
|
61
63
|
def sample_stratified(traces, size)
|
|
62
64
|
groups = traces.group_by { |t| t[:span_kind] }
|
|
63
|
-
return traces unless size
|
|
65
|
+
return traces unless size # rubocop:disable Legion/Extension/RunnerReturnHash
|
|
64
66
|
|
|
65
67
|
per_group = [size / [groups.size, 1].max, 1].max
|
|
66
68
|
groups.values.flat_map { |g| g.first(per_group) }.first(size)
|