lex-dataset 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fd78edb40ac84145152b707d558dafa107c0bd9ae45995c4b035e17e55392989
4
- data.tar.gz: 61951016c2bc00670d0f58e7157150f50693e7e7da06b863a1bf7085a6a9fc20
3
+ metadata.gz: 83cb04fc58c23760cc47ac99d49d2b2718754dbac8542388cc8c2cda55bb7fdc
4
+ data.tar.gz: 73e9f11620a34dcab3676db6fc1ed65da74cb05b2e3c26e46f4c171424b4ba67
5
5
  SHA512:
6
- metadata.gz: ca56908b97943c80ddd750383930de3b7700cfcd86ca9e6eae9b5a3af42f44a74df69e5d9f97dfb4544213dc2b41243c01ac3e4956ae56e2cd53169fd480d1bf
7
- data.tar.gz: f4dda491c496cffd270a451ca704b8fa2b4ba231d7a6c411ed5c00fc8cb12965a5429a19b0df4b6a7f16f821d291328f9754e501f938da8fb324dcd2710a4b8a
6
+ metadata.gz: 78b6277e1931b68c34880789019e45edeca1634a0e4e563d7dd4ec48747c11e379509641cf16303f7a9893ca9bb6f479d8b3d780294f2626481081514092369b
7
+ data.tar.gz: 38c2f8b14e600146011c83f198b93b5fe89b9ef3cf1c6cdfa933a8b51217ce79b70d065430b212bf6cf16f15aaa44d824236fee659ffca1891cf612c0d2b7e79
@@ -8,6 +8,8 @@ module Legion
8
8
  module Dataset
9
9
  module Runners
10
10
  module Dataset
11
+ extend self
12
+
11
13
  def create_dataset(name:, description: nil, rows: [], **)
12
14
  ds_id = db[:datasets].insert(name: name, description: description, created_at: Time.now.utc)
13
15
  create_version(ds_id, rows)
@@ -62,7 +64,7 @@ module Legion
62
64
  return { error: 'legion-llm is not available' } unless llm_available?
63
65
 
64
66
  rows = call_llm_for_rows(description: description, count: count, schema: schema, model: model)
65
- return rows if rows.is_a?(Hash) && rows[:error]
67
+ return rows if rows.is_a?(Hash) && rows[:error] # rubocop:disable Legion/Extension/RunnerReturnHash
66
68
 
67
69
  result = create_dataset(name: name, description: description, rows: rows)
68
70
  result.merge(generated: true)
@@ -92,14 +94,14 @@ module Legion
92
94
 
93
95
  def invoke_llm(prompt:, **llm_opts)
94
96
  result = if Legion::LLM.respond_to?(:structured)
95
- Legion::LLM.structured(
97
+ Legion::LLM.structured( # rubocop:disable Legion/HelperMigration/DirectLlm
96
98
  message: prompt,
97
99
  schema: generate_schema,
98
100
  caller: { extension: 'lex-dataset', operation: 'generate' },
99
101
  **llm_opts
100
102
  )
101
103
  else
102
- Legion::LLM.chat(message: prompt, caller: { extension: 'lex-dataset', operation: 'generate' }, **llm_opts)
104
+ Legion::LLM.chat(message: prompt, caller: { extension: 'lex-dataset', operation: 'generate' }, **llm_opts) # rubocop:disable Legion/HelperMigration/DirectLlm
103
105
  end
104
106
  content = result.respond_to?(:content) ? result.content : result.to_s
105
107
  content.strip.sub(/\A```(?:json)?\n?/, '').sub(/\n?```\z/, '')
@@ -107,13 +109,13 @@ module Legion
107
109
 
108
110
  def parse_llm_rows(content)
109
111
  parsed = ::JSON.parse(content)
110
- return nil unless parsed.is_a?(Array)
112
+ return nil unless parsed.is_a?(Array) # rubocop:disable Legion/Extension/RunnerReturnHash
111
113
 
112
114
  parsed.map do |item|
113
115
  h = item.transform_keys(&:to_sym)
114
116
  { input: h[:input].to_s, expected_output: h[:expected_output]&.to_s }
115
117
  end
116
- rescue ::JSON::ParserError
118
+ rescue ::JSON::ParserError => _e
117
119
  nil
118
120
  end
119
121
 
@@ -7,6 +7,8 @@ module Legion
7
7
  module Dataset
8
8
  module Runners
9
9
  module Experiment
10
+ extend self
11
+
10
12
  def run_experiment(name:, dataset_name:, task_callable:, dataset_version: nil, evaluators: [], **)
11
13
  ds = get_dataset(name: dataset_name, version: dataset_version)
12
14
  return { error: ds[:error] } if ds[:error]
@@ -69,7 +71,7 @@ module Legion
69
71
 
70
72
  def load_experiment_results(name)
71
73
  exp = db[:experiments].where(name: name).first
72
- return nil unless exp
74
+ return nil unless exp # rubocop:disable Legion/Extension/RunnerReturnHash
73
75
 
74
76
  db[:experiment_results].where(experiment_id: exp[:id]).order(:row_index).all
75
77
  end
@@ -5,6 +5,8 @@ module Legion
5
5
  module Dataset
6
6
  module Runners
7
7
  module Sampling
8
+ extend self
9
+
8
10
  def sample_from_traces(dataset_name:, source: :legion_data, filters: {},
9
11
  sample_size: nil, strategy: :recent, **)
10
12
  traces = fetch_traces(source, filters)
@@ -52,7 +54,7 @@ module Legion
52
54
 
53
55
  def sample_error_biased(traces, size)
54
56
  errors, successes = traces.partition { |t| t[:status] == 'error' }
55
- return traces unless size
57
+ return traces unless size # rubocop:disable Legion/Extension/RunnerReturnHash
56
58
 
57
59
  half = size / 2
58
60
  (errors.first(half) + successes.first(size - half)).first(size)
@@ -60,7 +62,7 @@ module Legion
60
62
 
61
63
  def sample_stratified(traces, size)
62
64
  groups = traces.group_by { |t| t[:span_kind] }
63
- return traces unless size
65
+ return traces unless size # rubocop:disable Legion/Extension/RunnerReturnHash
64
66
 
65
67
  per_group = [size / [groups.size, 1].max, 1].max
66
68
  groups.values.flat_map { |g| g.first(per_group) }.first(size)
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Dataset
6
- VERSION = '0.2.4'
6
+ VERSION = '0.2.5'
7
7
  end
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-dataset
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Iverson