desiru 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.example +34 -0
- data/.rubocop.yml +7 -4
- data/.ruby-version +1 -0
- data/CLAUDE.md +4 -0
- data/Gemfile +21 -2
- data/Gemfile.lock +87 -12
- data/README.md +295 -2
- data/Rakefile +1 -0
- data/db/migrations/001_create_initial_tables.rb +96 -0
- data/db/migrations/002_create_job_results.rb +39 -0
- data/desiru.db +0 -0
- data/desiru.gemspec +2 -5
- data/docs/background_processing_roadmap.md +87 -0
- data/docs/job_scheduling.md +167 -0
- data/dspy-analysis-swarm.yml +60 -0
- data/dspy-feature-analysis.md +121 -0
- data/examples/README.md +69 -0
- data/examples/api_with_persistence.rb +122 -0
- data/examples/assertions_example.rb +232 -0
- data/examples/async_processing.rb +2 -0
- data/examples/few_shot_learning.rb +1 -2
- data/examples/graphql_api.rb +4 -2
- data/examples/graphql_integration.rb +3 -3
- data/examples/graphql_optimization_summary.md +143 -0
- data/examples/graphql_performance_benchmark.rb +247 -0
- data/examples/persistence_example.rb +102 -0
- data/examples/react_agent.rb +203 -0
- data/examples/rest_api.rb +173 -0
- data/examples/rest_api_advanced.rb +333 -0
- data/examples/scheduled_job_example.rb +116 -0
- data/examples/simple_qa.rb +1 -2
- data/examples/sinatra_api.rb +109 -0
- data/examples/typed_signatures.rb +1 -2
- data/graphql_optimization_summary.md +53 -0
- data/lib/desiru/api/grape_integration.rb +284 -0
- data/lib/desiru/api/persistence_middleware.rb +148 -0
- data/lib/desiru/api/sinatra_integration.rb +217 -0
- data/lib/desiru/api.rb +42 -0
- data/lib/desiru/assertions.rb +74 -0
- data/lib/desiru/async_status.rb +65 -0
- data/lib/desiru/cache.rb +1 -1
- data/lib/desiru/configuration.rb +2 -1
- data/lib/desiru/errors.rb +160 -0
- data/lib/desiru/field.rb +17 -14
- data/lib/desiru/graphql/batch_loader.rb +85 -0
- data/lib/desiru/graphql/data_loader.rb +242 -75
- data/lib/desiru/graphql/enum_builder.rb +75 -0
- data/lib/desiru/graphql/executor.rb +37 -4
- data/lib/desiru/graphql/schema_generator.rb +62 -158
- data/lib/desiru/graphql/type_builder.rb +138 -0
- data/lib/desiru/graphql/type_cache_warmer.rb +91 -0
- data/lib/desiru/jobs/async_predict.rb +1 -1
- data/lib/desiru/jobs/base.rb +67 -0
- data/lib/desiru/jobs/batch_processor.rb +6 -6
- data/lib/desiru/jobs/retriable.rb +119 -0
- data/lib/desiru/jobs/retry_strategies.rb +169 -0
- data/lib/desiru/jobs/scheduler.rb +219 -0
- data/lib/desiru/jobs/webhook_notifier.rb +242 -0
- data/lib/desiru/models/anthropic.rb +164 -0
- data/lib/desiru/models/base.rb +37 -3
- data/lib/desiru/models/open_ai.rb +151 -0
- data/lib/desiru/models/open_router.rb +161 -0
- data/lib/desiru/module.rb +59 -9
- data/lib/desiru/modules/chain_of_thought.rb +3 -3
- data/lib/desiru/modules/majority.rb +51 -0
- data/lib/desiru/modules/multi_chain_comparison.rb +204 -0
- data/lib/desiru/modules/predict.rb +8 -1
- data/lib/desiru/modules/program_of_thought.rb +139 -0
- data/lib/desiru/modules/react.rb +273 -0
- data/lib/desiru/modules/retrieve.rb +4 -2
- data/lib/desiru/optimizers/base.rb +2 -4
- data/lib/desiru/optimizers/bootstrap_few_shot.rb +2 -2
- data/lib/desiru/optimizers/copro.rb +268 -0
- data/lib/desiru/optimizers/knn_few_shot.rb +185 -0
- data/lib/desiru/persistence/database.rb +71 -0
- data/lib/desiru/persistence/models/api_request.rb +38 -0
- data/lib/desiru/persistence/models/job_result.rb +138 -0
- data/lib/desiru/persistence/models/module_execution.rb +37 -0
- data/lib/desiru/persistence/models/optimization_result.rb +28 -0
- data/lib/desiru/persistence/models/training_example.rb +25 -0
- data/lib/desiru/persistence/models.rb +11 -0
- data/lib/desiru/persistence/repositories/api_request_repository.rb +98 -0
- data/lib/desiru/persistence/repositories/base_repository.rb +77 -0
- data/lib/desiru/persistence/repositories/job_result_repository.rb +116 -0
- data/lib/desiru/persistence/repositories/module_execution_repository.rb +85 -0
- data/lib/desiru/persistence/repositories/optimization_result_repository.rb +67 -0
- data/lib/desiru/persistence/repositories/training_example_repository.rb +102 -0
- data/lib/desiru/persistence/repository.rb +29 -0
- data/lib/desiru/persistence/setup.rb +77 -0
- data/lib/desiru/persistence.rb +49 -0
- data/lib/desiru/registry.rb +3 -5
- data/lib/desiru/signature.rb +91 -24
- data/lib/desiru/version.rb +1 -1
- data/lib/desiru.rb +23 -8
- data/missing-features-analysis.md +192 -0
- metadata +63 -45
- data/lib/desiru/models/raix_adapter.rb +0 -210
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Desiru
|
4
|
+
module Persistence
|
5
|
+
module Models
|
6
|
+
# Tracks module execution history
|
7
|
+
class ModuleExecution < Base
|
8
|
+
set_dataset :module_executions
|
9
|
+
many_to_one :api_request
|
10
|
+
|
11
|
+
json_column :inputs
|
12
|
+
json_column :outputs
|
13
|
+
json_column :metadata
|
14
|
+
|
15
|
+
def validate
|
16
|
+
super
|
17
|
+
validates_presence %i[module_name status started_at]
|
18
|
+
validates_includes %w[pending running completed failed], :status
|
19
|
+
end
|
20
|
+
|
21
|
+
def duration
|
22
|
+
return nil unless started_at && finished_at
|
23
|
+
|
24
|
+
finished_at - started_at
|
25
|
+
end
|
26
|
+
|
27
|
+
def success?
|
28
|
+
status == 'completed'
|
29
|
+
end
|
30
|
+
|
31
|
+
def failed?
|
32
|
+
status == 'failed'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Desiru
|
4
|
+
module Persistence
|
5
|
+
module Models
|
6
|
+
# Stores optimization results and metrics
|
7
|
+
class OptimizationResult < Base
|
8
|
+
set_dataset :optimization_results
|
9
|
+
json_column :parameters
|
10
|
+
json_column :metrics
|
11
|
+
json_column :best_prompts
|
12
|
+
|
13
|
+
def validate
|
14
|
+
super
|
15
|
+
validates_presence %i[module_name optimizer_type score]
|
16
|
+
validates_numeric :score
|
17
|
+
validates_min_length 1, :training_size if training_size
|
18
|
+
end
|
19
|
+
|
20
|
+
def improvement_percentage
|
21
|
+
return nil unless baseline_score && score.positive?
|
22
|
+
|
23
|
+
((score - baseline_score) / baseline_score * 100).round(2)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Desiru
|
4
|
+
module Persistence
|
5
|
+
module Models
|
6
|
+
# Stores training examples for modules
|
7
|
+
class TrainingExample < Base
|
8
|
+
set_dataset :training_examples
|
9
|
+
json_column :inputs
|
10
|
+
json_column :expected_outputs
|
11
|
+
json_column :metadata
|
12
|
+
|
13
|
+
def validate
|
14
|
+
super
|
15
|
+
validates_presence %i[module_name inputs]
|
16
|
+
validates_includes %w[training validation test], :dataset_type if dataset_type
|
17
|
+
end
|
18
|
+
|
19
|
+
def used?
|
20
|
+
used_count&.positive?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base_repository'
|
4
|
+
|
5
|
+
module Desiru
|
6
|
+
module Persistence
|
7
|
+
module Repositories
|
8
|
+
# Repository for API request records
|
9
|
+
class ApiRequestRepository < BaseRepository
|
10
|
+
def initialize
|
11
|
+
super(Models::ApiRequest)
|
12
|
+
end
|
13
|
+
|
14
|
+
def find_by_path(path)
|
15
|
+
dataset.where(path: path).all
|
16
|
+
end
|
17
|
+
|
18
|
+
def recent(limit = 10)
|
19
|
+
dataset
|
20
|
+
.order(Sequel.desc(:created_at))
|
21
|
+
.limit(limit)
|
22
|
+
.all
|
23
|
+
end
|
24
|
+
|
25
|
+
def by_status_code_range(min, max)
|
26
|
+
dataset.where(status_code: min..max).all
|
27
|
+
end
|
28
|
+
|
29
|
+
def successful
|
30
|
+
by_status_code_range(200, 299)
|
31
|
+
end
|
32
|
+
|
33
|
+
def failed
|
34
|
+
dataset.where { status_code >= 400 }.all
|
35
|
+
end
|
36
|
+
|
37
|
+
def average_response_time(path = nil)
|
38
|
+
scope = dataset
|
39
|
+
scope = scope.where(path: path) if path
|
40
|
+
scope = scope.exclude(response_time: nil)
|
41
|
+
|
42
|
+
avg = scope.avg(:response_time)
|
43
|
+
avg&.round(3)
|
44
|
+
end
|
45
|
+
|
46
|
+
def requests_per_minute(minutes_ago = 60)
|
47
|
+
since = Time.now - (minutes_ago * 60)
|
48
|
+
count = dataset.where { created_at >= since }.count
|
49
|
+
|
50
|
+
(count.to_f / minutes_ago).round(2)
|
51
|
+
end
|
52
|
+
|
53
|
+
def top_paths(limit = 10)
|
54
|
+
dataset
|
55
|
+
.group_and_count(:path)
|
56
|
+
.order(Sequel.desc(:count))
|
57
|
+
.limit(limit)
|
58
|
+
.map { |row| { path: row[:path], count: row[:count] } }
|
59
|
+
end
|
60
|
+
|
61
|
+
def create_from_rack_request(request, response)
|
62
|
+
create(
|
63
|
+
method: request.request_method,
|
64
|
+
path: request.path_info,
|
65
|
+
remote_ip: request.ip,
|
66
|
+
headers: extract_headers(request),
|
67
|
+
params: request.params,
|
68
|
+
status_code: response.status,
|
69
|
+
response_body: extract_response_body(response),
|
70
|
+
response_time: response.headers['X-Runtime']&.to_f
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def extract_headers(request)
|
77
|
+
headers = {}
|
78
|
+
request.each_header do |key, value|
|
79
|
+
next unless key.start_with?('HTTP_')
|
80
|
+
|
81
|
+
header_name = key.sub(/^HTTP_/, '').split('_').map(&:capitalize).join('-')
|
82
|
+
headers[header_name] = value
|
83
|
+
end
|
84
|
+
headers
|
85
|
+
end
|
86
|
+
|
87
|
+
def extract_response_body(response)
|
88
|
+
return nil unless response.body.respond_to?(:each)
|
89
|
+
|
90
|
+
body = response.body.map { |part| part }
|
91
|
+
body.join
|
92
|
+
rescue StandardError
|
93
|
+
nil
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Desiru
|
4
|
+
module Persistence
|
5
|
+
module Repositories
|
6
|
+
# Base repository with common CRUD operations
|
7
|
+
class BaseRepository
|
8
|
+
attr_reader :model_class
|
9
|
+
|
10
|
+
def initialize(model_class)
|
11
|
+
@model_class = model_class
|
12
|
+
end
|
13
|
+
|
14
|
+
def all
|
15
|
+
dataset.all
|
16
|
+
end
|
17
|
+
|
18
|
+
def find(id)
|
19
|
+
dataset.first(id: id)
|
20
|
+
end
|
21
|
+
|
22
|
+
def find_by(conditions)
|
23
|
+
dataset.where(conditions).first
|
24
|
+
end
|
25
|
+
|
26
|
+
def where(conditions)
|
27
|
+
dataset.where(conditions).all
|
28
|
+
end
|
29
|
+
|
30
|
+
def create(attributes)
|
31
|
+
model_class.create(attributes)
|
32
|
+
end
|
33
|
+
|
34
|
+
def update(id, attributes)
|
35
|
+
record = find(id)
|
36
|
+
return nil unless record
|
37
|
+
|
38
|
+
record.update(attributes)
|
39
|
+
record
|
40
|
+
end
|
41
|
+
|
42
|
+
def delete?(id)
|
43
|
+
record = find(id)
|
44
|
+
return false unless record
|
45
|
+
|
46
|
+
record.destroy
|
47
|
+
true
|
48
|
+
end
|
49
|
+
|
50
|
+
def count
|
51
|
+
dataset.count
|
52
|
+
end
|
53
|
+
|
54
|
+
def exists?(conditions)
|
55
|
+
dataset.where(conditions).count.positive?
|
56
|
+
end
|
57
|
+
|
58
|
+
def paginate(page: 1, per_page: 20)
|
59
|
+
dataset
|
60
|
+
.limit(per_page)
|
61
|
+
.offset((page - 1) * per_page)
|
62
|
+
.all
|
63
|
+
end
|
64
|
+
|
65
|
+
protected
|
66
|
+
|
67
|
+
def dataset
|
68
|
+
model_class.dataset
|
69
|
+
end
|
70
|
+
|
71
|
+
def transaction(&)
|
72
|
+
Database.transaction(&)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base_repository'
|
4
|
+
|
5
|
+
module Desiru
|
6
|
+
module Persistence
|
7
|
+
module Repositories
|
8
|
+
# Repository for job result persistence
|
9
|
+
class JobResultRepository < BaseRepository
|
10
|
+
def initialize
|
11
|
+
super(Models::JobResult)
|
12
|
+
end
|
13
|
+
|
14
|
+
def create_for_job(job_id, job_class, queue, inputs: nil, expires_at: nil)
|
15
|
+
create(
|
16
|
+
job_id: job_id,
|
17
|
+
job_class: job_class,
|
18
|
+
queue: queue,
|
19
|
+
status: Models::JobResult::STATUS_PENDING,
|
20
|
+
inputs: inputs&.to_json,
|
21
|
+
enqueued_at: Time.now,
|
22
|
+
expires_at: expires_at
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
def find_by_job_id(job_id)
|
27
|
+
find_by(job_id: job_id)
|
28
|
+
end
|
29
|
+
|
30
|
+
def mark_processing(job_id)
|
31
|
+
job_result = find_by_job_id(job_id)
|
32
|
+
return nil unless job_result
|
33
|
+
|
34
|
+
job_result.mark_as_processing!
|
35
|
+
job_result
|
36
|
+
end
|
37
|
+
|
38
|
+
def mark_completed(job_id, result, message: nil)
|
39
|
+
job_result = find_by_job_id(job_id)
|
40
|
+
return nil unless job_result
|
41
|
+
|
42
|
+
job_result.mark_as_completed!(result, message: message)
|
43
|
+
job_result
|
44
|
+
end
|
45
|
+
|
46
|
+
def mark_failed(job_id, error, backtrace: nil, increment_retry: true)
|
47
|
+
job_result = find_by_job_id(job_id)
|
48
|
+
return nil unless job_result
|
49
|
+
|
50
|
+
updates = {
|
51
|
+
status: Models::JobResult::STATUS_FAILED,
|
52
|
+
finished_at: Time.now,
|
53
|
+
error_message: error.to_s,
|
54
|
+
error_backtrace: backtrace&.join("\n")
|
55
|
+
}
|
56
|
+
|
57
|
+
updates[:retry_count] = job_result.retry_count + 1 if increment_retry
|
58
|
+
|
59
|
+
job_result.update(updates)
|
60
|
+
job_result
|
61
|
+
end
|
62
|
+
|
63
|
+
def update_progress(job_id, progress, message: nil)
|
64
|
+
job_result = find_by_job_id(job_id)
|
65
|
+
return nil unless job_result
|
66
|
+
|
67
|
+
job_result.update_progress(progress, message: message)
|
68
|
+
job_result
|
69
|
+
end
|
70
|
+
|
71
|
+
def cleanup_expired
|
72
|
+
dataset.expired.delete
|
73
|
+
end
|
74
|
+
|
75
|
+
def recent_by_class(job_class, limit: 10)
|
76
|
+
dataset.by_job_class(job_class).recent(limit).all
|
77
|
+
end
|
78
|
+
|
79
|
+
def statistics(job_class: nil, since: nil)
|
80
|
+
scope = dataset
|
81
|
+
scope = scope.by_job_class(job_class) if job_class
|
82
|
+
scope = scope.where { created_at >= since } if since
|
83
|
+
|
84
|
+
{
|
85
|
+
total: scope.count,
|
86
|
+
pending: scope.pending.count,
|
87
|
+
processing: scope.processing.count,
|
88
|
+
completed: scope.completed.count,
|
89
|
+
failed: scope.failed.count,
|
90
|
+
average_duration: calculate_average_duration(scope)
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def calculate_average_duration(dataset)
|
97
|
+
completed = dataset.completed.where(Sequel.~(started_at: nil)).where(Sequel.~(finished_at: nil))
|
98
|
+
return 0 if completed.empty?
|
99
|
+
|
100
|
+
total_duration = 0
|
101
|
+
count = 0
|
102
|
+
|
103
|
+
completed.each do |job|
|
104
|
+
next unless job.started_at && job.finished_at
|
105
|
+
|
106
|
+
duration = job.finished_at - job.started_at
|
107
|
+
total_duration += duration
|
108
|
+
count += 1
|
109
|
+
end
|
110
|
+
|
111
|
+
count.positive? ? total_duration / count : 0
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base_repository'
|
4
|
+
|
5
|
+
module Desiru
|
6
|
+
module Persistence
|
7
|
+
module Repositories
|
8
|
+
# Repository for module execution records
|
9
|
+
class ModuleExecutionRepository < BaseRepository
|
10
|
+
def initialize
|
11
|
+
super(Models::ModuleExecution)
|
12
|
+
end
|
13
|
+
|
14
|
+
def find_by_module(module_name)
|
15
|
+
dataset.where(module_name: module_name).all
|
16
|
+
end
|
17
|
+
|
18
|
+
def recent(limit = 10)
|
19
|
+
dataset
|
20
|
+
.order(Sequel.desc(:started_at))
|
21
|
+
.limit(limit)
|
22
|
+
.all
|
23
|
+
end
|
24
|
+
|
25
|
+
def by_status(status)
|
26
|
+
dataset.where(status: status).all
|
27
|
+
end
|
28
|
+
|
29
|
+
def average_duration(module_name = nil)
|
30
|
+
scope = dataset
|
31
|
+
scope = scope.where(module_name: module_name) if module_name
|
32
|
+
scope = scope.where(status: 'completed')
|
33
|
+
.exclude(finished_at: nil)
|
34
|
+
|
35
|
+
records = scope.all
|
36
|
+
return nil if records.empty?
|
37
|
+
|
38
|
+
durations = records.map(&:duration).compact
|
39
|
+
return nil if durations.empty?
|
40
|
+
|
41
|
+
durations.sum.to_f / durations.length
|
42
|
+
end
|
43
|
+
|
44
|
+
def success_rate(module_name = nil)
|
45
|
+
scope = dataset
|
46
|
+
scope = scope.where(module_name: module_name) if module_name
|
47
|
+
|
48
|
+
total = scope.count
|
49
|
+
return 0.0 if total.zero?
|
50
|
+
|
51
|
+
successful = scope.where(status: 'completed').count
|
52
|
+
(successful.to_f / total * 100).round(2)
|
53
|
+
end
|
54
|
+
|
55
|
+
def create_for_module(module_name, inputs, api_request_id: nil)
|
56
|
+
create(
|
57
|
+
module_name: module_name,
|
58
|
+
inputs: inputs,
|
59
|
+
status: 'pending',
|
60
|
+
started_at: Time.now,
|
61
|
+
api_request_id: api_request_id
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
def complete(id, outputs, metadata = {})
|
66
|
+
update(id, {
|
67
|
+
outputs: outputs,
|
68
|
+
metadata: metadata,
|
69
|
+
status: 'completed',
|
70
|
+
finished_at: Time.now
|
71
|
+
})
|
72
|
+
end
|
73
|
+
|
74
|
+
def fail(id, error_message, error_backtrace = nil)
|
75
|
+
update(id, {
|
76
|
+
error_message: error_message,
|
77
|
+
error_backtrace: error_backtrace,
|
78
|
+
status: 'failed',
|
79
|
+
finished_at: Time.now
|
80
|
+
})
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base_repository'
|
4
|
+
|
5
|
+
module Desiru
|
6
|
+
module Persistence
|
7
|
+
module Repositories
|
8
|
+
# Repository for optimization result records
|
9
|
+
class OptimizationResultRepository < BaseRepository
|
10
|
+
def initialize
|
11
|
+
super(Models::OptimizationResult)
|
12
|
+
end
|
13
|
+
|
14
|
+
def find_by_module(module_name)
|
15
|
+
dataset.where(module_name: module_name).all
|
16
|
+
end
|
17
|
+
|
18
|
+
def find_best_for_module(module_name)
|
19
|
+
dataset
|
20
|
+
.where(module_name: module_name)
|
21
|
+
.order(Sequel.desc(:score))
|
22
|
+
.first
|
23
|
+
end
|
24
|
+
|
25
|
+
def recent(limit = 10)
|
26
|
+
dataset
|
27
|
+
.order(Sequel.desc(:created_at))
|
28
|
+
.limit(limit)
|
29
|
+
.all
|
30
|
+
end
|
31
|
+
|
32
|
+
def by_optimizer_type(type)
|
33
|
+
dataset.where(optimizer_type: type).all
|
34
|
+
end
|
35
|
+
|
36
|
+
def average_improvement(module_name = nil)
|
37
|
+
scope = dataset.exclude(baseline_score: nil)
|
38
|
+
scope = scope.where(module_name: module_name) if module_name
|
39
|
+
|
40
|
+
improvements = scope.select_map(&:improvement_percentage).compact
|
41
|
+
|
42
|
+
return nil if improvements.empty?
|
43
|
+
|
44
|
+
improvements.sum / improvements.length
|
45
|
+
end
|
46
|
+
|
47
|
+
def top_performers(limit = 5)
|
48
|
+
dataset
|
49
|
+
.exclude(baseline_score: nil)
|
50
|
+
.order(Sequel.desc { (score - baseline_score) / baseline_score })
|
51
|
+
.limit(limit)
|
52
|
+
.all
|
53
|
+
end
|
54
|
+
|
55
|
+
def create_result(module_name:, optimizer_type:, score:, **attributes)
|
56
|
+
create(
|
57
|
+
module_name: module_name,
|
58
|
+
optimizer_type: optimizer_type,
|
59
|
+
score: score,
|
60
|
+
started_at: attributes[:started_at] || Time.now,
|
61
|
+
**attributes
|
62
|
+
)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base_repository'
|
4
|
+
|
5
|
+
module Desiru
|
6
|
+
module Persistence
|
7
|
+
module Repositories
|
8
|
+
# Repository for training example records
|
9
|
+
class TrainingExampleRepository < BaseRepository
|
10
|
+
def initialize
|
11
|
+
super(Models::TrainingExample)
|
12
|
+
end
|
13
|
+
|
14
|
+
def find_by_module(module_name, dataset_type: nil)
|
15
|
+
scope = dataset.where(module_name: module_name)
|
16
|
+
scope = scope.where(dataset_type: dataset_type) if dataset_type
|
17
|
+
scope.all
|
18
|
+
end
|
19
|
+
|
20
|
+
def find_unused(module_name, limit = 10)
|
21
|
+
dataset
|
22
|
+
.where(module_name: module_name, used_count: 0)
|
23
|
+
.limit(limit)
|
24
|
+
.all
|
25
|
+
end
|
26
|
+
|
27
|
+
def find_least_used(module_name, limit = 10)
|
28
|
+
dataset
|
29
|
+
.where(module_name: module_name)
|
30
|
+
.order(:used_count, :last_used_at)
|
31
|
+
.limit(limit)
|
32
|
+
.all
|
33
|
+
end
|
34
|
+
|
35
|
+
def mark_as_used?(id)
|
36
|
+
record = find(id)
|
37
|
+
return false unless record
|
38
|
+
|
39
|
+
record.update(
|
40
|
+
used_count: record.used_count + 1,
|
41
|
+
last_used_at: Time.now
|
42
|
+
)
|
43
|
+
true
|
44
|
+
end
|
45
|
+
|
46
|
+
def bulk_create(module_name, examples, dataset_type: 'training')
|
47
|
+
transaction do
|
48
|
+
examples.map do |example|
|
49
|
+
create(
|
50
|
+
module_name: module_name,
|
51
|
+
dataset_type: dataset_type,
|
52
|
+
inputs: example[:inputs],
|
53
|
+
expected_outputs: example[:outputs],
|
54
|
+
metadata: example[:metadata]
|
55
|
+
)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def split_dataset(module_name, train_ratio: 0.8, val_ratio: 0.1)
|
61
|
+
all_examples = find_by_module(module_name)
|
62
|
+
total = all_examples.length
|
63
|
+
|
64
|
+
train_size = (total * train_ratio).floor
|
65
|
+
val_size = (total * val_ratio).floor
|
66
|
+
|
67
|
+
shuffled = all_examples.shuffle
|
68
|
+
|
69
|
+
{
|
70
|
+
training: shuffled[0...train_size],
|
71
|
+
validation: shuffled[train_size...(train_size + val_size)],
|
72
|
+
test: shuffled[(train_size + val_size)..]
|
73
|
+
}
|
74
|
+
end
|
75
|
+
|
76
|
+
def export_for_training(module_name, format: :dspy)
|
77
|
+
examples = find_by_module(module_name, dataset_type: 'training')
|
78
|
+
|
79
|
+
case format
|
80
|
+
when :dspy
|
81
|
+
examples.map do |ex|
|
82
|
+
{
|
83
|
+
inputs: ex.inputs,
|
84
|
+
outputs: ex.expected_outputs
|
85
|
+
}
|
86
|
+
end
|
87
|
+
when :jsonl
|
88
|
+
examples.map do |ex|
|
89
|
+
JSON.generate({
|
90
|
+
inputs: ex.inputs,
|
91
|
+
outputs: ex.expected_outputs,
|
92
|
+
metadata: ex.metadata
|
93
|
+
})
|
94
|
+
end.join("\n")
|
95
|
+
else
|
96
|
+
raise ArgumentError, "Unknown format: #{format}"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'repositories/base_repository'
|
4
|
+
require_relative 'repositories/module_execution_repository'
|
5
|
+
require_relative 'repositories/api_request_repository'
|
6
|
+
require_relative 'repositories/optimization_result_repository'
|
7
|
+
require_relative 'repositories/training_example_repository'
|
8
|
+
require_relative 'repositories/job_result_repository'
|
9
|
+
|
10
|
+
module Desiru
|
11
|
+
module Persistence
|
12
|
+
# Repository pattern for data access
|
13
|
+
module Repository
|
14
|
+
def self.setup!
|
15
|
+
# Register all repositories
|
16
|
+
Persistence.register_repository(:module_executions,
|
17
|
+
Repositories::ModuleExecutionRepository.new)
|
18
|
+
Persistence.register_repository(:api_requests,
|
19
|
+
Repositories::ApiRequestRepository.new)
|
20
|
+
Persistence.register_repository(:optimization_results,
|
21
|
+
Repositories::OptimizationResultRepository.new)
|
22
|
+
Persistence.register_repository(:training_examples,
|
23
|
+
Repositories::TrainingExampleRepository.new)
|
24
|
+
Persistence.register_repository(:job_results,
|
25
|
+
Repositories::JobResultRepository.new)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|