lex-dataset 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fd78edb40ac84145152b707d558dafa107c0bd9ae45995c4b035e17e55392989
4
- data.tar.gz: 61951016c2bc00670d0f58e7157150f50693e7e7da06b863a1bf7085a6a9fc20
3
+ metadata.gz: 0114e9e266c0a6f2d4d88fefcf535a91bae559db9784d03a775dbdc1c9b95bf9
4
+ data.tar.gz: 28b59c1e639d99123f54d1f5630fd02df62c9b6912da06679d917dede310e71b
5
5
  SHA512:
6
- metadata.gz: ca56908b97943c80ddd750383930de3b7700cfcd86ca9e6eae9b5a3af42f44a74df69e5d9f97dfb4544213dc2b41243c01ac3e4956ae56e2cd53169fd480d1bf
7
- data.tar.gz: f4dda491c496cffd270a451ca704b8fa2b4ba231d7a6c411ed5c00fc8cb12965a5429a19b0df4b6a7f16f821d291328f9754e501f938da8fb324dcd2710a4b8a
6
+ metadata.gz: b9be421ed0c919084fd388f151e465f98e08b095e0bf1756358d2ec9bd813d88654a0dc0f60a5fbeb09cd3c1c9c058f10c056ae79011c5bae9a4cea42b862454
7
+ data.tar.gz: f0ed08bcc4d021ae90d489b3bce2fd1a47629bfe7ea8e55538945afc8ad6a8052f88316452d6d28fcafa69e952a018089b3ede1f42649e0e9216f4e4698afb4d
@@ -8,6 +8,12 @@ module Legion
8
8
  module Dataset
9
9
  module Runners
10
10
  module Dataset
11
+ extend self
12
+
13
+ def self.remote_invocable?
14
+ false
15
+ end
16
+
11
17
  def create_dataset(name:, description: nil, rows: [], **)
12
18
  ds_id = db[:datasets].insert(name: name, description: description, created_at: Time.now.utc)
13
19
  create_version(ds_id, rows)
@@ -62,7 +68,7 @@ module Legion
62
68
  return { error: 'legion-llm is not available' } unless llm_available?
63
69
 
64
70
  rows = call_llm_for_rows(description: description, count: count, schema: schema, model: model)
65
- return rows if rows.is_a?(Hash) && rows[:error]
71
+ return rows if rows.is_a?(Hash) && rows[:error] # rubocop:disable Legion/Extension/RunnerReturnHash
66
72
 
67
73
  result = create_dataset(name: name, description: description, rows: rows)
68
74
  result.merge(generated: true)
@@ -92,14 +98,14 @@ module Legion
92
98
 
93
99
  def invoke_llm(prompt:, **llm_opts)
94
100
  result = if Legion::LLM.respond_to?(:structured)
95
- Legion::LLM.structured(
101
+ Legion::LLM.structured( # rubocop:disable Legion/HelperMigration/DirectLlm
96
102
  message: prompt,
97
103
  schema: generate_schema,
98
104
  caller: { extension: 'lex-dataset', operation: 'generate' },
99
105
  **llm_opts
100
106
  )
101
107
  else
102
- Legion::LLM.chat(message: prompt, caller: { extension: 'lex-dataset', operation: 'generate' }, **llm_opts)
108
+ Legion::LLM.chat(message: prompt, caller: { extension: 'lex-dataset', operation: 'generate' }, **llm_opts) # rubocop:disable Legion/HelperMigration/DirectLlm
103
109
  end
104
110
  content = result.respond_to?(:content) ? result.content : result.to_s
105
111
  content.strip.sub(/\A```(?:json)?\n?/, '').sub(/\n?```\z/, '')
@@ -107,13 +113,13 @@ module Legion
107
113
 
108
114
  def parse_llm_rows(content)
109
115
  parsed = ::JSON.parse(content)
110
- return nil unless parsed.is_a?(Array)
116
+ return nil unless parsed.is_a?(Array) # rubocop:disable Legion/Extension/RunnerReturnHash
111
117
 
112
118
  parsed.map do |item|
113
119
  h = item.transform_keys(&:to_sym)
114
120
  { input: h[:input].to_s, expected_output: h[:expected_output]&.to_s }
115
121
  end
116
- rescue ::JSON::ParserError
122
+ rescue ::JSON::ParserError => _e
117
123
  nil
118
124
  end
119
125
 
@@ -126,7 +132,8 @@ module Legion
126
132
  if schema
127
133
  lines << ''
128
134
  lines << 'Schema guidance for inputs and outputs:'
129
- lines << "```json\n#{::JSON.generate(schema)}\n```"
135
+ lines << ''
136
+ lines << ::JSON.generate(schema)
130
137
  end
131
138
  lines << ''
132
139
  lines << 'Respond ONLY with a valid JSON array, no other text.'
@@ -7,6 +7,8 @@ module Legion
7
7
  module Dataset
8
8
  module Runners
9
9
  module Experiment
10
+ extend self
11
+
10
12
  def run_experiment(name:, dataset_name:, task_callable:, dataset_version: nil, evaluators: [], **)
11
13
  ds = get_dataset(name: dataset_name, version: dataset_version)
12
14
  return { error: ds[:error] } if ds[:error]
@@ -69,7 +71,7 @@ module Legion
69
71
 
70
72
  def load_experiment_results(name)
71
73
  exp = db[:experiments].where(name: name).first
72
- return nil unless exp
74
+ return nil unless exp # rubocop:disable Legion/Extension/RunnerReturnHash
73
75
 
74
76
  db[:experiment_results].where(experiment_id: exp[:id]).order(:row_index).all
75
77
  end
@@ -5,6 +5,8 @@ module Legion
5
5
  module Dataset
6
6
  module Runners
7
7
  module Sampling
8
+ extend self
9
+
8
10
  def sample_from_traces(dataset_name:, source: :legion_data, filters: {},
9
11
  sample_size: nil, strategy: :recent, **)
10
12
  traces = fetch_traces(source, filters)
@@ -52,7 +54,7 @@ module Legion
52
54
 
53
55
  def sample_error_biased(traces, size)
54
56
  errors, successes = traces.partition { |t| t[:status] == 'error' }
55
- return traces unless size
57
+ return traces unless size # rubocop:disable Legion/Extension/RunnerReturnHash
56
58
 
57
59
  half = size / 2
58
60
  (errors.first(half) + successes.first(size - half)).first(size)
@@ -60,7 +62,7 @@ module Legion
60
62
 
61
63
  def sample_stratified(traces, size)
62
64
  groups = traces.group_by { |t| t[:span_kind] }
63
- return traces unless size
65
+ return traces unless size # rubocop:disable Legion/Extension/RunnerReturnHash
64
66
 
65
67
  per_group = [size / [groups.size, 1].max, 1].max
66
68
  groups.values.flat_map { |g| g.first(per_group) }.first(size)
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Dataset
6
- VERSION = '0.2.4'
6
+ VERSION = '0.2.6'
7
7
  end
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-dataset
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Iverson