ruby_llm-agents 0.5.0 → 1.0.0.beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +189 -31
- data/app/controllers/ruby_llm/agents/agents_controller.rb +136 -16
- data/app/controllers/ruby_llm/agents/dashboard_controller.rb +29 -9
- data/app/controllers/ruby_llm/agents/workflows_controller.rb +355 -0
- data/app/helpers/ruby_llm/agents/application_helper.rb +25 -0
- data/app/models/ruby_llm/agents/execution.rb +3 -0
- data/app/models/ruby_llm/agents/tenant_budget.rb +58 -15
- data/app/services/ruby_llm/agents/agent_registry.rb +51 -12
- data/app/views/layouts/ruby_llm/agents/application.html.erb +2 -29
- data/app/views/ruby_llm/agents/agents/_agent.html.erb +13 -1
- data/app/views/ruby_llm/agents/agents/_config_agent.html.erb +235 -0
- data/app/views/ruby_llm/agents/agents/_config_embedder.html.erb +70 -0
- data/app/views/ruby_llm/agents/agents/_config_image_generator.html.erb +152 -0
- data/app/views/ruby_llm/agents/agents/_config_moderator.html.erb +63 -0
- data/app/views/ruby_llm/agents/agents/_config_speaker.html.erb +108 -0
- data/app/views/ruby_llm/agents/agents/_config_transcriber.html.erb +91 -0
- data/app/views/ruby_llm/agents/agents/_workflow.html.erb +1 -1
- data/app/views/ruby_llm/agents/agents/index.html.erb +74 -9
- data/app/views/ruby_llm/agents/agents/show.html.erb +18 -378
- data/app/views/ruby_llm/agents/dashboard/_agent_comparison.html.erb +269 -15
- data/app/views/ruby_llm/agents/executions/show.html.erb +16 -0
- data/app/views/ruby_llm/agents/shared/_agent_type_badge.html.erb +93 -0
- data/app/views/ruby_llm/agents/workflows/_step_performance.html.erb +236 -0
- data/app/views/ruby_llm/agents/workflows/_structure_parallel.html.erb +76 -0
- data/app/views/ruby_llm/agents/workflows/_structure_pipeline.html.erb +74 -0
- data/app/views/ruby_llm/agents/workflows/_structure_router.html.erb +108 -0
- data/app/views/ruby_llm/agents/workflows/show.html.erb +442 -0
- data/config/routes.rb +1 -0
- data/lib/generators/ruby_llm_agents/agent_generator.rb +56 -7
- data/lib/generators/ruby_llm_agents/background_remover_generator.rb +110 -0
- data/lib/generators/ruby_llm_agents/embedder_generator.rb +107 -0
- data/lib/generators/ruby_llm_agents/image_analyzer_generator.rb +115 -0
- data/lib/generators/ruby_llm_agents/image_editor_generator.rb +108 -0
- data/lib/generators/ruby_llm_agents/image_generator_generator.rb +116 -0
- data/lib/generators/ruby_llm_agents/image_pipeline_generator.rb +178 -0
- data/lib/generators/ruby_llm_agents/image_transformer_generator.rb +109 -0
- data/lib/generators/ruby_llm_agents/image_upscaler_generator.rb +103 -0
- data/lib/generators/ruby_llm_agents/image_variator_generator.rb +102 -0
- data/lib/generators/ruby_llm_agents/install_generator.rb +76 -4
- data/lib/generators/ruby_llm_agents/restructure_generator.rb +292 -0
- data/lib/generators/ruby_llm_agents/speaker_generator.rb +121 -0
- data/lib/generators/ruby_llm_agents/templates/add_execution_type_migration.rb.tt +8 -0
- data/lib/generators/ruby_llm_agents/templates/agent.rb.tt +99 -84
- data/lib/generators/ruby_llm_agents/templates/application_agent.rb.tt +42 -40
- data/lib/generators/ruby_llm_agents/templates/application_background_remover.rb.tt +26 -0
- data/lib/generators/ruby_llm_agents/templates/application_embedder.rb.tt +50 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_analyzer.rb.tt +26 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_editor.rb.tt +20 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_generator.rb.tt +38 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_pipeline.rb.tt +139 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_transformer.rb.tt +21 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_upscaler.rb.tt +20 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_variator.rb.tt +20 -0
- data/lib/generators/ruby_llm_agents/templates/application_speaker.rb.tt +49 -0
- data/lib/generators/ruby_llm_agents/templates/application_transcriber.rb.tt +53 -0
- data/lib/generators/ruby_llm_agents/templates/background_remover.rb.tt +44 -0
- data/lib/generators/ruby_llm_agents/templates/embedder.rb.tt +41 -0
- data/lib/generators/ruby_llm_agents/templates/image_analyzer.rb.tt +45 -0
- data/lib/generators/ruby_llm_agents/templates/image_editor.rb.tt +35 -0
- data/lib/generators/ruby_llm_agents/templates/image_generator.rb.tt +47 -0
- data/lib/generators/ruby_llm_agents/templates/image_pipeline.rb.tt +50 -0
- data/lib/generators/ruby_llm_agents/templates/image_transformer.rb.tt +44 -0
- data/lib/generators/ruby_llm_agents/templates/image_upscaler.rb.tt +38 -0
- data/lib/generators/ruby_llm_agents/templates/image_variator.rb.tt +33 -0
- data/lib/generators/ruby_llm_agents/templates/skills/AGENTS.md.tt +228 -0
- data/lib/generators/ruby_llm_agents/templates/skills/BACKGROUND_REMOVERS.md.tt +131 -0
- data/lib/generators/ruby_llm_agents/templates/skills/EMBEDDERS.md.tt +255 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_ANALYZERS.md.tt +120 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_EDITORS.md.tt +102 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_GENERATORS.md.tt +282 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_PIPELINES.md.tt +228 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_TRANSFORMERS.md.tt +120 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_UPSCALERS.md.tt +110 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_VARIATORS.md.tt +120 -0
- data/lib/generators/ruby_llm_agents/templates/skills/SPEAKERS.md.tt +212 -0
- data/lib/generators/ruby_llm_agents/templates/skills/TOOLS.md.tt +227 -0
- data/lib/generators/ruby_llm_agents/templates/skills/TRANSCRIBERS.md.tt +251 -0
- data/lib/generators/ruby_llm_agents/templates/skills/WORKFLOWS.md.tt +300 -0
- data/lib/generators/ruby_llm_agents/templates/speaker.rb.tt +56 -0
- data/lib/generators/ruby_llm_agents/templates/transcriber.rb.tt +51 -0
- data/lib/generators/ruby_llm_agents/transcriber_generator.rb +107 -0
- data/lib/generators/ruby_llm_agents/upgrade_generator.rb +152 -1
- data/lib/ruby_llm/agents/audio/speaker.rb +553 -0
- data/lib/ruby_llm/agents/audio/transcriber.rb +669 -0
- data/lib/ruby_llm/agents/base_agent.rb +675 -0
- data/lib/ruby_llm/agents/core/base/moderation_dsl.rb +181 -0
- data/lib/ruby_llm/agents/core/base/moderation_execution.rb +274 -0
- data/lib/ruby_llm/agents/core/base.rb +135 -0
- data/lib/ruby_llm/agents/core/configuration.rb +981 -0
- data/lib/ruby_llm/agents/core/errors.rb +150 -0
- data/lib/ruby_llm/agents/{instrumentation.rb → core/instrumentation.rb} +22 -1
- data/lib/ruby_llm/agents/core/llm_tenant.rb +358 -0
- data/lib/ruby_llm/agents/{version.rb → core/version.rb} +1 -1
- data/lib/ruby_llm/agents/dsl/base.rb +110 -0
- data/lib/ruby_llm/agents/dsl/caching.rb +142 -0
- data/lib/ruby_llm/agents/dsl/reliability.rb +307 -0
- data/lib/ruby_llm/agents/dsl.rb +41 -0
- data/lib/ruby_llm/agents/image/analyzer/dsl.rb +130 -0
- data/lib/ruby_llm/agents/image/analyzer/execution.rb +402 -0
- data/lib/ruby_llm/agents/image/analyzer.rb +90 -0
- data/lib/ruby_llm/agents/image/background_remover/dsl.rb +154 -0
- data/lib/ruby_llm/agents/image/background_remover/execution.rb +240 -0
- data/lib/ruby_llm/agents/image/background_remover.rb +89 -0
- data/lib/ruby_llm/agents/image/concerns/image_operation_dsl.rb +91 -0
- data/lib/ruby_llm/agents/image/concerns/image_operation_execution.rb +165 -0
- data/lib/ruby_llm/agents/image/editor/dsl.rb +56 -0
- data/lib/ruby_llm/agents/image/editor/execution.rb +207 -0
- data/lib/ruby_llm/agents/image/editor.rb +92 -0
- data/lib/ruby_llm/agents/image/generator/active_storage_support.rb +127 -0
- data/lib/ruby_llm/agents/image/generator/content_policy.rb +95 -0
- data/lib/ruby_llm/agents/image/generator/pricing.rb +353 -0
- data/lib/ruby_llm/agents/image/generator/templates.rb +124 -0
- data/lib/ruby_llm/agents/image/generator.rb +455 -0
- data/lib/ruby_llm/agents/image/pipeline/dsl.rb +213 -0
- data/lib/ruby_llm/agents/image/pipeline/execution.rb +382 -0
- data/lib/ruby_llm/agents/image/pipeline.rb +97 -0
- data/lib/ruby_llm/agents/image/transformer/dsl.rb +148 -0
- data/lib/ruby_llm/agents/image/transformer/execution.rb +223 -0
- data/lib/ruby_llm/agents/image/transformer.rb +95 -0
- data/lib/ruby_llm/agents/image/upscaler/dsl.rb +83 -0
- data/lib/ruby_llm/agents/image/upscaler/execution.rb +219 -0
- data/lib/ruby_llm/agents/image/upscaler.rb +81 -0
- data/lib/ruby_llm/agents/image/variator/dsl.rb +62 -0
- data/lib/ruby_llm/agents/image/variator/execution.rb +189 -0
- data/lib/ruby_llm/agents/image/variator.rb +80 -0
- data/lib/ruby_llm/agents/{alert_manager.rb → infrastructure/alert_manager.rb} +17 -22
- data/lib/ruby_llm/agents/infrastructure/budget/budget_query.rb +145 -0
- data/lib/ruby_llm/agents/infrastructure/budget/config_resolver.rb +149 -0
- data/lib/ruby_llm/agents/infrastructure/budget/forecaster.rb +68 -0
- data/lib/ruby_llm/agents/infrastructure/budget/spend_recorder.rb +279 -0
- data/lib/ruby_llm/agents/infrastructure/budget_tracker.rb +275 -0
- data/lib/ruby_llm/agents/{execution_logger_job.rb → infrastructure/execution_logger_job.rb} +17 -1
- data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/executor.rb +2 -1
- data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/retry_strategy.rb +9 -3
- data/lib/ruby_llm/agents/{reliability.rb → infrastructure/reliability.rb} +11 -21
- data/lib/ruby_llm/agents/pipeline/builder.rb +215 -0
- data/lib/ruby_llm/agents/pipeline/context.rb +255 -0
- data/lib/ruby_llm/agents/pipeline/executor.rb +86 -0
- data/lib/ruby_llm/agents/pipeline/middleware/base.rb +124 -0
- data/lib/ruby_llm/agents/pipeline/middleware/budget.rb +95 -0
- data/lib/ruby_llm/agents/pipeline/middleware/cache.rb +171 -0
- data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +415 -0
- data/lib/ruby_llm/agents/pipeline/middleware/reliability.rb +276 -0
- data/lib/ruby_llm/agents/pipeline/middleware/tenant.rb +196 -0
- data/lib/ruby_llm/agents/pipeline.rb +68 -0
- data/lib/ruby_llm/agents/{engine.rb → rails/engine.rb} +79 -11
- data/lib/ruby_llm/agents/results/background_removal_result.rb +286 -0
- data/lib/ruby_llm/agents/{result.rb → results/base.rb} +73 -1
- data/lib/ruby_llm/agents/results/embedding_result.rb +243 -0
- data/lib/ruby_llm/agents/results/image_analysis_result.rb +314 -0
- data/lib/ruby_llm/agents/results/image_edit_result.rb +250 -0
- data/lib/ruby_llm/agents/results/image_generation_result.rb +346 -0
- data/lib/ruby_llm/agents/results/image_pipeline_result.rb +399 -0
- data/lib/ruby_llm/agents/results/image_transform_result.rb +251 -0
- data/lib/ruby_llm/agents/results/image_upscale_result.rb +255 -0
- data/lib/ruby_llm/agents/results/image_variation_result.rb +237 -0
- data/lib/ruby_llm/agents/results/moderation_result.rb +158 -0
- data/lib/ruby_llm/agents/results/speech_result.rb +338 -0
- data/lib/ruby_llm/agents/results/transcription_result.rb +408 -0
- data/lib/ruby_llm/agents/text/embedder.rb +444 -0
- data/lib/ruby_llm/agents/text/moderator.rb +237 -0
- data/lib/ruby_llm/agents/workflow/async.rb +220 -0
- data/lib/ruby_llm/agents/workflow/async_executor.rb +156 -0
- data/lib/ruby_llm/agents/{workflow.rb → workflow/orchestrator.rb} +6 -5
- data/lib/ruby_llm/agents/workflow/parallel.rb +34 -17
- data/lib/ruby_llm/agents/workflow/thread_pool.rb +185 -0
- data/lib/ruby_llm/agents.rb +86 -20
- metadata +172 -34
- data/lib/ruby_llm/agents/base/caching.rb +0 -40
- data/lib/ruby_llm/agents/base/cost_calculation.rb +0 -105
- data/lib/ruby_llm/agents/base/dsl.rb +0 -324
- data/lib/ruby_llm/agents/base/execution.rb +0 -366
- data/lib/ruby_llm/agents/base/reliability_dsl.rb +0 -82
- data/lib/ruby_llm/agents/base/reliability_execution.rb +0 -136
- data/lib/ruby_llm/agents/base/response_building.rb +0 -86
- data/lib/ruby_llm/agents/base/tool_tracking.rb +0 -57
- data/lib/ruby_llm/agents/base.rb +0 -210
- data/lib/ruby_llm/agents/budget_tracker.rb +0 -733
- data/lib/ruby_llm/agents/configuration.rb +0 -394
- /data/lib/ruby_llm/agents/{deprecations.rb → core/deprecations.rb} +0 -0
- /data/lib/ruby_llm/agents/{inflections.rb → core/inflections.rb} +0 -0
- /data/lib/ruby_llm/agents/{resolved_config.rb → core/resolved_config.rb} +0 -0
- /data/lib/ruby_llm/agents/{attempt_tracker.rb → infrastructure/attempt_tracker.rb} +0 -0
- /data/lib/ruby_llm/agents/{cache_helper.rb → infrastructure/cache_helper.rb} +0 -0
- /data/lib/ruby_llm/agents/{circuit_breaker.rb → infrastructure/circuit_breaker.rb} +0 -0
- /data/lib/ruby_llm/agents/{redactor.rb → infrastructure/redactor.rb} +0 -0
- /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/breaker_manager.rb +0 -0
- /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/execution_constraints.rb +0 -0
- /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/fallback_routing.rb +0 -0
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Agents
|
|
5
|
+
# Result object for transcription operations
|
|
6
|
+
#
|
|
7
|
+
# Wraps transcription output with metadata about the operation including
|
|
8
|
+
# audio duration, timing, cost, and utility methods for output formatting.
|
|
9
|
+
#
|
|
10
|
+
# @example Basic transcription
|
|
11
|
+
# result = MeetingTranscriber.call(audio: "meeting.mp3")
|
|
12
|
+
# result.text # => "Hello everyone..."
|
|
13
|
+
# result.audio_duration # => 60.5
|
|
14
|
+
# result.total_cost # => 0.006
|
|
15
|
+
#
|
|
16
|
+
# @example With segments
|
|
17
|
+
# result = SubtitleTranscriber.call(audio: "video.mp4")
|
|
18
|
+
# result.segments # => [{ start: 0.0, end: 2.5, text: "Hello" }, ...]
|
|
19
|
+
# result.srt # => "1\n00:00:00,000 --> 00:00:02,500\nHello\n\n..."
|
|
20
|
+
# result.vtt # => "WEBVTT\n\n00:00:00.000 --> 00:00:02.500\nHello\n\n..."
|
|
21
|
+
#
|
|
22
|
+
# @example Speaker diarization
|
|
23
|
+
# result = InterviewTranscriber.call(audio: "interview.mp3")
|
|
24
|
+
# result.speakers # => ["Interviewer", "Guest"]
|
|
25
|
+
# result.speaker_segments # => { "Interviewer" => [...], "Guest" => [...] }
|
|
26
|
+
#
|
|
27
|
+
# @api public
|
|
28
|
+
class TranscriptionResult
|
|
29
|
+
# @!group Content
|
|
30
|
+
|
|
31
|
+
# @!attribute [r] text
|
|
32
|
+
# @return [String, nil] The full transcription text
|
|
33
|
+
attr_reader :text
|
|
34
|
+
|
|
35
|
+
# @!attribute [r] segments
|
|
36
|
+
# @return [Array<Hash>, nil] Array of timed segments with :start, :end, :text keys
|
|
37
|
+
attr_reader :segments
|
|
38
|
+
|
|
39
|
+
# @!attribute [r] words
|
|
40
|
+
# @return [Array<Hash>, nil] Array of timed words (if word-level timestamps available)
|
|
41
|
+
attr_reader :words
|
|
42
|
+
|
|
43
|
+
# @!endgroup
|
|
44
|
+
|
|
45
|
+
# @!group Speaker Diarization
|
|
46
|
+
|
|
47
|
+
# @!attribute [r] speakers
|
|
48
|
+
# @return [Array<String>, nil] Identified speaker names/labels
|
|
49
|
+
attr_reader :speakers
|
|
50
|
+
|
|
51
|
+
# @!attribute [r] speaker_segments
|
|
52
|
+
# @return [Hash<String, Array>, nil] Segments grouped by speaker
|
|
53
|
+
attr_reader :speaker_segments
|
|
54
|
+
|
|
55
|
+
# @!endgroup
|
|
56
|
+
|
|
57
|
+
# @!group Audio Metadata
|
|
58
|
+
|
|
59
|
+
# @!attribute [r] audio_duration
|
|
60
|
+
# @return [Float, nil] Duration of audio in seconds
|
|
61
|
+
attr_reader :audio_duration
|
|
62
|
+
|
|
63
|
+
# @!attribute [r] audio_format
|
|
64
|
+
# @return [String, nil] Detected audio format (mp3, wav, etc.)
|
|
65
|
+
attr_reader :audio_format
|
|
66
|
+
|
|
67
|
+
# @!attribute [r] audio_channels
|
|
68
|
+
# @return [Integer, nil] Number of audio channels (1=mono, 2=stereo)
|
|
69
|
+
attr_reader :audio_channels
|
|
70
|
+
|
|
71
|
+
# @!attribute [r] audio_sample_rate
|
|
72
|
+
# @return [Integer, nil] Sample rate in Hz
|
|
73
|
+
attr_reader :audio_sample_rate
|
|
74
|
+
|
|
75
|
+
# @!endgroup
|
|
76
|
+
|
|
77
|
+
# @!group Language
|
|
78
|
+
|
|
79
|
+
# @!attribute [r] language
|
|
80
|
+
# @return [String, nil] Language code (ISO 639-1) that was requested
|
|
81
|
+
attr_reader :language
|
|
82
|
+
|
|
83
|
+
# @!attribute [r] detected_language
|
|
84
|
+
# @return [String, nil] Auto-detected language code
|
|
85
|
+
attr_reader :detected_language
|
|
86
|
+
|
|
87
|
+
# @!attribute [r] language_confidence
|
|
88
|
+
# @return [Float, nil] Confidence score for language detection (0.0-1.0)
|
|
89
|
+
attr_reader :language_confidence
|
|
90
|
+
|
|
91
|
+
# @!endgroup
|
|
92
|
+
|
|
93
|
+
# @!group Model Info
|
|
94
|
+
|
|
95
|
+
# @!attribute [r] model_id
|
|
96
|
+
# @return [String, nil] The transcription model used
|
|
97
|
+
attr_reader :model_id
|
|
98
|
+
|
|
99
|
+
# @!endgroup
|
|
100
|
+
|
|
101
|
+
# @!group Timing
|
|
102
|
+
|
|
103
|
+
# @!attribute [r] duration_ms
|
|
104
|
+
# @return [Integer, nil] Execution duration in milliseconds
|
|
105
|
+
attr_reader :duration_ms
|
|
106
|
+
|
|
107
|
+
# @!attribute [r] started_at
|
|
108
|
+
# @return [Time, nil] When execution started
|
|
109
|
+
attr_reader :started_at
|
|
110
|
+
|
|
111
|
+
# @!attribute [r] completed_at
|
|
112
|
+
# @return [Time, nil] When execution completed
|
|
113
|
+
attr_reader :completed_at
|
|
114
|
+
|
|
115
|
+
# @!endgroup
|
|
116
|
+
|
|
117
|
+
# @!group Cost & Usage
|
|
118
|
+
|
|
119
|
+
# @!attribute [r] total_cost
|
|
120
|
+
# @return [Float, nil] Total cost in USD
|
|
121
|
+
attr_reader :total_cost
|
|
122
|
+
|
|
123
|
+
# @!attribute [r] audio_minutes
|
|
124
|
+
# @return [Float, nil] Billable audio minutes
|
|
125
|
+
attr_reader :audio_minutes
|
|
126
|
+
|
|
127
|
+
# @!endgroup
|
|
128
|
+
|
|
129
|
+
# @!group Quality
|
|
130
|
+
|
|
131
|
+
# @!attribute [r] confidence
|
|
132
|
+
# @return [Float, nil] Overall confidence score (0.0-1.0)
|
|
133
|
+
attr_reader :confidence
|
|
134
|
+
|
|
135
|
+
# @!endgroup
|
|
136
|
+
|
|
137
|
+
# @!group Status
|
|
138
|
+
|
|
139
|
+
# @!attribute [r] status
|
|
140
|
+
# @return [Symbol] Status (:success, :partial, :failed)
|
|
141
|
+
attr_reader :status
|
|
142
|
+
|
|
143
|
+
# @!attribute [r] chunks
|
|
144
|
+
# @return [Array<TranscriptionResult>, nil] Individual chunk results for long audio
|
|
145
|
+
attr_reader :chunks
|
|
146
|
+
|
|
147
|
+
# @!endgroup
|
|
148
|
+
|
|
149
|
+
# @!group Multi-tenancy
|
|
150
|
+
|
|
151
|
+
# @!attribute [r] tenant_id
|
|
152
|
+
# @return [String, nil] Tenant identifier if multi-tenancy enabled
|
|
153
|
+
attr_reader :tenant_id
|
|
154
|
+
|
|
155
|
+
# @!endgroup
|
|
156
|
+
|
|
157
|
+
# @!group Error
|
|
158
|
+
|
|
159
|
+
# @!attribute [r] error_class
|
|
160
|
+
# @return [String, nil] Exception class name if failed
|
|
161
|
+
attr_reader :error_class
|
|
162
|
+
|
|
163
|
+
# @!attribute [r] error_message
|
|
164
|
+
# @return [String, nil] Exception message if failed
|
|
165
|
+
attr_reader :error_message
|
|
166
|
+
|
|
167
|
+
# @!endgroup
|
|
168
|
+
|
|
169
|
+
# Creates a new TranscriptionResult instance
|
|
170
|
+
#
|
|
171
|
+
# @param attributes [Hash] Result attributes
|
|
172
|
+
# @option attributes [String] :text The transcription text
|
|
173
|
+
# @option attributes [Array<Hash>] :segments Timed segments
|
|
174
|
+
# @option attributes [Array<Hash>] :words Timed words
|
|
175
|
+
# @option attributes [Array<String>] :speakers Speaker names
|
|
176
|
+
# @option attributes [Hash] :speaker_segments Segments by speaker
|
|
177
|
+
# @option attributes [Float] :audio_duration Audio duration in seconds
|
|
178
|
+
# @option attributes [String] :audio_format Audio format
|
|
179
|
+
# @option attributes [Integer] :audio_channels Number of channels
|
|
180
|
+
# @option attributes [Integer] :audio_sample_rate Sample rate in Hz
|
|
181
|
+
# @option attributes [String] :language Requested language
|
|
182
|
+
# @option attributes [String] :detected_language Detected language
|
|
183
|
+
# @option attributes [Float] :language_confidence Language confidence
|
|
184
|
+
# @option attributes [String] :model_id Model used
|
|
185
|
+
# @option attributes [Integer] :duration_ms Execution duration
|
|
186
|
+
# @option attributes [Time] :started_at Start time
|
|
187
|
+
# @option attributes [Time] :completed_at Completion time
|
|
188
|
+
# @option attributes [Float] :total_cost Cost in USD
|
|
189
|
+
# @option attributes [Float] :audio_minutes Billable minutes
|
|
190
|
+
# @option attributes [Float] :confidence Overall confidence
|
|
191
|
+
# @option attributes [Symbol] :status Status
|
|
192
|
+
# @option attributes [Array] :chunks Chunk results
|
|
193
|
+
# @option attributes [String] :tenant_id Tenant identifier
|
|
194
|
+
# @option attributes [String] :error_class Error class
|
|
195
|
+
# @option attributes [String] :error_message Error message
|
|
196
|
+
def initialize(attributes = {})
|
|
197
|
+
# Content
|
|
198
|
+
@text = attributes[:text]
|
|
199
|
+
@segments = attributes[:segments]
|
|
200
|
+
@words = attributes[:words]
|
|
201
|
+
|
|
202
|
+
# Speaker diarization
|
|
203
|
+
@speakers = attributes[:speakers]
|
|
204
|
+
@speaker_segments = attributes[:speaker_segments]
|
|
205
|
+
|
|
206
|
+
# Audio metadata
|
|
207
|
+
@audio_duration = attributes[:audio_duration]
|
|
208
|
+
@audio_format = attributes[:audio_format]
|
|
209
|
+
@audio_channels = attributes[:audio_channels]
|
|
210
|
+
@audio_sample_rate = attributes[:audio_sample_rate]
|
|
211
|
+
|
|
212
|
+
# Language
|
|
213
|
+
@language = attributes[:language]
|
|
214
|
+
@detected_language = attributes[:detected_language]
|
|
215
|
+
@language_confidence = attributes[:language_confidence]
|
|
216
|
+
|
|
217
|
+
# Model info
|
|
218
|
+
@model_id = attributes[:model_id]
|
|
219
|
+
|
|
220
|
+
# Timing
|
|
221
|
+
@duration_ms = attributes[:duration_ms]
|
|
222
|
+
@started_at = attributes[:started_at]
|
|
223
|
+
@completed_at = attributes[:completed_at]
|
|
224
|
+
|
|
225
|
+
# Cost & usage
|
|
226
|
+
@total_cost = attributes[:total_cost]
|
|
227
|
+
@audio_minutes = attributes[:audio_minutes] || (audio_duration ? audio_duration / 60.0 : nil)
|
|
228
|
+
|
|
229
|
+
# Quality
|
|
230
|
+
@confidence = attributes[:confidence]
|
|
231
|
+
|
|
232
|
+
# Status
|
|
233
|
+
@status = attributes[:status] || :success
|
|
234
|
+
@chunks = attributes[:chunks]
|
|
235
|
+
|
|
236
|
+
# Multi-tenancy
|
|
237
|
+
@tenant_id = attributes[:tenant_id]
|
|
238
|
+
|
|
239
|
+
# Error
|
|
240
|
+
@error_class = attributes[:error_class]
|
|
241
|
+
@error_message = attributes[:error_message]
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Returns whether the transcription succeeded
|
|
245
|
+
#
|
|
246
|
+
# @return [Boolean] true if no error occurred
|
|
247
|
+
def success?
|
|
248
|
+
error_class.nil? && status == :success
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Returns whether the transcription failed
|
|
252
|
+
#
|
|
253
|
+
# @return [Boolean] true if an error occurred
|
|
254
|
+
def error?
|
|
255
|
+
!success?
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Returns whether partial results are available
|
|
259
|
+
#
|
|
260
|
+
# @return [Boolean] true if status is :partial
|
|
261
|
+
def partial?
|
|
262
|
+
status == :partial
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# Returns whether speaker diarization data is available
|
|
266
|
+
#
|
|
267
|
+
# @return [Boolean] true if speakers were identified
|
|
268
|
+
def diarized?
|
|
269
|
+
speakers.present? && speakers.any?
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Returns the transcription as SRT subtitle format
|
|
273
|
+
#
|
|
274
|
+
# @return [String, nil] SRT formatted subtitles
|
|
275
|
+
def srt
|
|
276
|
+
return nil unless segments.present?
|
|
277
|
+
|
|
278
|
+
segments.each_with_index.map do |segment, index|
|
|
279
|
+
start_time = format_srt_time(segment[:start])
|
|
280
|
+
end_time = format_srt_time(segment[:end])
|
|
281
|
+
text_content = segment[:text]&.strip
|
|
282
|
+
|
|
283
|
+
"#{index + 1}\n#{start_time} --> #{end_time}\n#{text_content}\n"
|
|
284
|
+
end.join("\n")
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Returns the transcription as WebVTT subtitle format
|
|
288
|
+
#
|
|
289
|
+
# @return [String, nil] VTT formatted subtitles
|
|
290
|
+
def vtt
|
|
291
|
+
return nil unless segments.present?
|
|
292
|
+
|
|
293
|
+
lines = ["WEBVTT", ""]
|
|
294
|
+
segments.each do |segment|
|
|
295
|
+
start_time = format_vtt_time(segment[:start])
|
|
296
|
+
end_time = format_vtt_time(segment[:end])
|
|
297
|
+
text_content = segment[:text]&.strip
|
|
298
|
+
|
|
299
|
+
lines << "#{start_time} --> #{end_time}"
|
|
300
|
+
lines << text_content
|
|
301
|
+
lines << ""
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
lines.join("\n")
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
# Returns calculated words per minute
|
|
308
|
+
#
|
|
309
|
+
# @return [Float, nil] Words per minute or nil if not calculable
|
|
310
|
+
def words_per_minute
|
|
311
|
+
return nil unless text.present? && audio_duration.present? && audio_duration > 0
|
|
312
|
+
|
|
313
|
+
word_count = text.split(/\s+/).count
|
|
314
|
+
(word_count / (audio_duration / 60.0)).round(1)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# Returns the segment at a specific timestamp
|
|
318
|
+
#
|
|
319
|
+
# @param timestamp [Float] Time in seconds
|
|
320
|
+
# @return [Hash, nil] The segment containing the timestamp
|
|
321
|
+
def segment_at(timestamp)
|
|
322
|
+
return nil unless segments.present?
|
|
323
|
+
|
|
324
|
+
segments.find do |segment|
|
|
325
|
+
timestamp >= segment[:start] && timestamp <= segment[:end]
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Returns text between two timestamps
|
|
330
|
+
#
|
|
331
|
+
# @param start_time [Float] Start time in seconds
|
|
332
|
+
# @param end_time [Float] End time in seconds
|
|
333
|
+
# @return [String, nil] Concatenated text from segments in range
|
|
334
|
+
def text_between(start_time, end_time)
|
|
335
|
+
return nil unless segments.present?
|
|
336
|
+
|
|
337
|
+
matching = segments.select do |segment|
|
|
338
|
+
segment[:start] >= start_time && segment[:end] <= end_time
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
matching.map { |s| s[:text] }.join(" ")
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# Converts the result to a hash
|
|
345
|
+
#
|
|
346
|
+
# @return [Hash] All result data as a hash
|
|
347
|
+
def to_h
|
|
348
|
+
{
|
|
349
|
+
text: text,
|
|
350
|
+
segments: segments,
|
|
351
|
+
words: words,
|
|
352
|
+
speakers: speakers,
|
|
353
|
+
speaker_segments: speaker_segments,
|
|
354
|
+
audio_duration: audio_duration,
|
|
355
|
+
audio_format: audio_format,
|
|
356
|
+
audio_channels: audio_channels,
|
|
357
|
+
audio_sample_rate: audio_sample_rate,
|
|
358
|
+
language: language,
|
|
359
|
+
detected_language: detected_language,
|
|
360
|
+
language_confidence: language_confidence,
|
|
361
|
+
model_id: model_id,
|
|
362
|
+
duration_ms: duration_ms,
|
|
363
|
+
started_at: started_at,
|
|
364
|
+
completed_at: completed_at,
|
|
365
|
+
total_cost: total_cost,
|
|
366
|
+
audio_minutes: audio_minutes,
|
|
367
|
+
confidence: confidence,
|
|
368
|
+
status: status,
|
|
369
|
+
tenant_id: tenant_id,
|
|
370
|
+
error_class: error_class,
|
|
371
|
+
error_message: error_message
|
|
372
|
+
}
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
private
|
|
376
|
+
|
|
377
|
+
# Formats time for SRT format (HH:MM:SS,mmm)
|
|
378
|
+
#
|
|
379
|
+
# @param seconds [Float] Time in seconds
|
|
380
|
+
# @return [String] SRT formatted time
|
|
381
|
+
def format_srt_time(seconds)
|
|
382
|
+
return "00:00:00,000" unless seconds
|
|
383
|
+
|
|
384
|
+
hours = (seconds / 3600).to_i
|
|
385
|
+
minutes = ((seconds % 3600) / 60).to_i
|
|
386
|
+
secs = (seconds % 60).to_i
|
|
387
|
+
millis = ((seconds % 1) * 1000).to_i
|
|
388
|
+
|
|
389
|
+
format("%02d:%02d:%02d,%03d", hours, minutes, secs, millis)
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# Formats time for VTT format (HH:MM:SS.mmm)
|
|
393
|
+
#
|
|
394
|
+
# @param seconds [Float] Time in seconds
|
|
395
|
+
# @return [String] VTT formatted time
|
|
396
|
+
def format_vtt_time(seconds)
|
|
397
|
+
return "00:00:00.000" unless seconds
|
|
398
|
+
|
|
399
|
+
hours = (seconds / 3600).to_i
|
|
400
|
+
minutes = ((seconds % 3600) / 60).to_i
|
|
401
|
+
secs = (seconds % 60).to_i
|
|
402
|
+
millis = ((seconds % 1) * 1000).to_i
|
|
403
|
+
|
|
404
|
+
format("%02d:%02d:%02d.%03d", hours, minutes, secs, millis)
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
end
|
|
408
|
+
end
|