RubyGems - sorbet-baml - Versions diffs - 0.1.0 → 0.3.0 - Mend

sorbet-baml 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

checksums.yaml +4 -4
data/CLAUDE.md +94 -0
data/README.md +315 -122
data/Rakefile +2 -2
data/docs-site/.gitignore +48 -0
data/docs-site/Gemfile +5 -0
data/docs-site/Gemfile.lock +140 -0
data/docs-site/Rakefile +3 -0
data/docs-site/bridgetown.config.yml +15 -0
data/docs-site/config/initializers.rb +9 -0
data/docs-site/config/puma.rb +9 -0
data/docs-site/config.ru +5 -0
data/docs-site/esbuild.config.js +11 -0
data/docs-site/frontend/javascript/index.js +22 -0
data/docs-site/frontend/styles/index.css +61 -0
data/docs-site/package.json +18 -0
data/docs-site/postcss.config.js +6 -0
data/docs-site/server/roda_app.rb +9 -0
data/docs-site/src/_components/head.liquid +26 -0
data/docs-site/src/_components/nav.liquid +68 -0
data/docs-site/src/_layouts/default.liquid +27 -0
data/docs-site/src/_layouts/doc.liquid +39 -0
data/docs-site/src/advanced-usage.md +598 -0
data/docs-site/src/getting-started.md +170 -0
data/docs-site/src/index.md +183 -0
data/docs-site/src/troubleshooting.md +317 -0
data/docs-site/src/type-mapping.md +236 -0
data/docs-site/tailwind.config.js +85 -0
data/examples/description_parameters.rb +49 -0
data/lib/sorbet_baml/comment_extractor.rb +51 -54
data/lib/sorbet_baml/converter.rb +69 -35
data/lib/sorbet_baml/dependency_resolver.rb +11 -11
data/lib/sorbet_baml/description_extension.rb +34 -0
data/lib/sorbet_baml/description_extractor.rb +34 -0
data/lib/sorbet_baml/dspy_tool_converter.rb +97 -0
data/lib/sorbet_baml/dspy_tool_extensions.rb +23 -0
data/lib/sorbet_baml/enum_extensions.rb +2 -2
data/lib/sorbet_baml/struct_extensions.rb +2 -2
data/lib/sorbet_baml/tool_extensions.rb +23 -0
data/lib/sorbet_baml/type_mapper.rb +35 -37
data/lib/sorbet_baml/version.rb +1 -1
data/lib/sorbet_baml.rb +41 -10
data/sorbet/config +2 -0
data/sorbet/rbi/gems/anthropic@1.5.0.rbi +21252 -0
data/sorbet/rbi/gems/async@2.27.3.rbi +9 -0
data/sorbet/rbi/gems/bigdecimal@3.2.2.rbi +9 -0
data/sorbet/rbi/gems/concurrent-ruby@1.3.5.rbi +424 -0
data/sorbet/rbi/gems/connection_pool@2.5.3.rbi +9 -0
data/sorbet/rbi/gems/console@1.33.0.rbi +9 -0
data/sorbet/rbi/gems/dry-configurable@1.3.0.rbi +672 -0
data/sorbet/rbi/gems/dry-core@1.1.0.rbi +1729 -0
data/sorbet/rbi/gems/dry-logger@1.1.0.rbi +1317 -0
data/sorbet/rbi/gems/dspy@0.19.1.rbi +6677 -0
data/sorbet/rbi/gems/ffi@1.17.2.rbi +2174 -0
data/sorbet/rbi/gems/fiber-annotation@0.2.0.rbi +9 -0
data/sorbet/rbi/gems/fiber-local@1.1.0.rbi +9 -0
data/sorbet/rbi/gems/fiber-storage@1.0.1.rbi +9 -0
data/sorbet/rbi/gems/google-protobuf@4.32.0.rbi +9 -0
data/sorbet/rbi/gems/googleapis-common-protos-types@1.20.0.rbi +9 -0
data/sorbet/rbi/gems/informers@1.2.1.rbi +1875 -0
data/sorbet/rbi/gems/io-event@1.12.1.rbi +9 -0
data/sorbet/rbi/gems/metrics@0.13.0.rbi +9 -0
data/sorbet/rbi/gems/onnxruntime@0.10.0.rbi +304 -0
data/sorbet/rbi/gems/openai@0.16.0.rbi +68055 -0
data/sorbet/rbi/gems/opentelemetry-api@1.6.0.rbi +9 -0
data/sorbet/rbi/gems/opentelemetry-common@0.22.0.rbi +9 -0
data/sorbet/rbi/gems/opentelemetry-exporter-otlp@0.30.0.rbi +9 -0
data/sorbet/rbi/gems/opentelemetry-registry@0.4.0.rbi +9 -0
data/sorbet/rbi/gems/opentelemetry-sdk@1.8.1.rbi +9 -0
data/sorbet/rbi/gems/opentelemetry-semantic_conventions@1.11.0.rbi +9 -0
data/sorbet/rbi/gems/polars-df@0.20.0.rbi +9 -0
data/sorbet/rbi/gems/sorbet-result@1.4.0.rbi +242 -0
data/sorbet/rbi/gems/sorbet-schema@0.9.2.rbi +743 -0
data/sorbet/rbi/gems/sorbet-struct-comparable@1.3.0.rbi +48 -0
data/sorbet/rbi/gems/tokenizers@0.5.5.rbi +754 -0
data/sorbet/rbi/gems/traces@0.17.0.rbi +9 -0
data/sorbet/rbi/gems/zeitwerk@2.7.3.rbi +1429 -0
metadata +67 -7
data/docs/README.md +0 -117
data/docs/advanced-usage.md +0 -427
data/docs/getting-started.md +0 -91
data/docs/troubleshooting.md +0 -291
data/docs/type-mapping.md +0 -192

data/docs-site/src/type-mapping.md ADDED Viewed

@@ -0,0 +1,236 @@
+---
+layout: doc
+title: "Type Mapping Reference"
+description: "Complete mapping between Sorbet types and BAML output. Learn how every Sorbet type converts to efficient BAML format."
+---
+# Type Mapping Reference
+Complete mapping between Sorbet types and BAML output for autonomous LLM workflows. All listed types are **fully supported** with automatic field descriptions.
+## Basic Types
+| Sorbet Type | BAML Output | Example |
+|-------------|-------------|---------|
+| `String` | `string` | `name string` |
+| `Integer` | `int` | `age int` |
+| `Float` | `float` | `price float` |
+| `T::Boolean` | `bool` | `active bool` |
+| `NilClass` | `null` | `null` |
+| `Symbol` | `string` | `status string` |
+| `Date/DateTime/Time` | `string` | `created_at string` |
+## Optional Types (T.nilable)
+| Sorbet Type | BAML Output | Example |
+|-------------|-------------|---------|
+| `T.nilable(String)` | `string?` | `email string?` |
+| `T.nilable(Integer)` | `int?` | `age int?` |
+| `T.nilable(MyStruct)` | `MyStruct?` | `address Address?` |
+## Collection Types
+| Sorbet Type | BAML Output | Example |
+|-------------|-------------|---------|
+| `T::Array[String]` | `string[]` | `tags string[]` |
+| `T::Array[Integer]` | `int[]` | `scores int[]` |
+| `T::Array[MyStruct]` | `MyStruct[]` | `addresses Address[]` |
+## Hash/Map Types
+| Sorbet Type | BAML Output | Example |
+|-------------|-------------|---------|
+| `T::Hash[String, String]` | `map<string, string>` | `metadata map<string, string>` |
+| `T::Hash[String, Integer]` | `map<string, int>` | `counts map<string, int>` |
+| `T::Hash[Symbol, String]` | `map<string, string>` | `config map<string, string>` |
+## Union Types (T.any)
+| Sorbet Type | BAML Output | Example |
+|-------------|-------------|---------|
+| `T.any(String, Integer)` | `string \| int` | `value string \| int` |
+| `T.any(String, Integer, Float)` | `string \| int \| float` | `mixed string \| int \| float` |
+| `T.nilable(T.any(String, Integer))` | `(string \| int)?` | `optional (string \| int)?` |
+## Complex Collection Types
+| Sorbet Type | BAML Output | Example |
+|-------------|-------------|---------|
+| `T::Array[T.any(String, Integer)]` | `(string \| int)[]` | `mixed_array (string \| int)[]` |
+| `T::Hash[String, T.any(String, Integer)]` | `map<string, string \| int>` | `settings map<string, string \| int>` |
+| `T::Hash[String, T::Array[String]]` | `map<string, string[]>` | `labels map<string, string[]>` |
+## Structured Types
+### T::Struct to BAML Classes (Research Workflow Example)
+```ruby
+class ConfidenceLevel < T::Enum
+  enums do
+    # Low confidence, requires further verification
+    Low = new('low')
+    # High confidence, strongly supported by evidence
+    High = new('high')
+  end
+end
+class ResearchFindings < T::Struct
+  # Detailed research findings and analysis
+  const :findings, String
+  # Key actionable insights extracted
+  const :key_insights, T::Array[String]
+  # Assessment of evidence quality
+  const :evidence_quality, ConfidenceLevel
+  # Confidence score (1-10 scale)
+  const :confidence_score, Integer
+end
+```
+```ruby
+ResearchFindings.to_baml
+```
+**Generated BAML:**
+```baml
+enum ConfidenceLevel {
+  "low" @description("Low confidence, requires further verification")
+  "high" @description("High confidence, strongly supported by evidence")
+}
+class ResearchFindings {
+  findings string @description("Detailed research findings and analysis")
+  key_insights string[] @description("Key actionable insights extracted")
+  evidence_quality ConfidenceLevel @description("Assessment of evidence quality")
+  confidence_score int @description("Confidence score (1-10 scale)")
+}
+```
+### T::Enum to BAML Enums (Task Classification)
+```ruby
+class TaskType < T::Enum
+  enums do
+    # Literature review and information gathering
+    Research = new('research')
+    # Data analysis and statistical interpretation
+    Analysis = new('analysis')
+    # Combining multiple sources into coherent insights
+    Synthesis = new('synthesis')
+  end
+end
+class ResearchTask < T::Struct
+  # Clear description of the research objective
+  const :objective, String
+  # Type of research task to be performed
+  const :task_type, TaskType
+end
+```
+```ruby
+[TaskType, ResearchTask].map(&:to_baml).join("\n\n")
+```
+**Generated BAML:**
+```baml
+enum TaskType {
+  "research" @description("Literature review and information gathering")
+  "analysis" @description("Data analysis and statistical interpretation")
+  "synthesis" @description("Combining multiple sources into coherent insights")
+}
+class ResearchTask {
+  objective string @description("Clear description of the research objective")
+  task_type TaskType @description("Type of research task to be performed")
+}
+```
+### Complex Real-World Example (Autonomous Research Agent)
+```ruby
+class ComplexityLevel < T::Enum
+  enums do
+    # Basic analysis requiring straightforward research
+    Basic = new('basic')
+    # Advanced analysis requiring deep domain expertise
+    Advanced = new('advanced')
+  end
+end
+class TaskDecomposition < T::Struct
+  # The main research topic being investigated
+  const :research_topic, String
+  # Target complexity level for the decomposition
+  const :complexity_level, ComplexityLevel
+  # Autonomously generated list of research subtasks
+  const :subtasks, T::Array[String]
+  # Strategic priority rankings (1-5 scale) for each subtask
+  const :priority_order, T::Array[Integer]
+  # Task dependency relationships for optimal sequencing
+  const :dependencies, T::Array[String]
+  # Key-value metadata for agent coordination
+  const :agent_metadata, T::Hash[String, T.any(String, Integer)]
+end
+```
+```ruby
+[ComplexityLevel, TaskDecomposition].map(&:to_baml).join("\n\n")
+```
+**Generated BAML:**
+```baml
+enum ComplexityLevel {
+  "basic" @description("Basic analysis requiring straightforward research")
+  "advanced" @description("Advanced analysis requiring deep domain expertise")
+}
+class TaskDecomposition {
+  research_topic string @description("The main research topic being investigated")
+  complexity_level ComplexityLevel @description("Target complexity level for the decomposition")
+  subtasks string[] @description("Autonomously generated list of research subtasks")
+  priority_order int[] @description("Strategic priority rankings (1-5 scale) for each subtask")
+  dependencies string[] @description("Task dependency relationships for optimal sequencing")
+  agent_metadata map<string, string | int> @description("Key-value metadata for agent coordination")
+}
+```
+## Advanced Features
+### Dependency Management
+```ruby
+# Dependencies automatically included with smart defaults
+TaskDecomposition.to_baml
+# Outputs ComplexityLevel enum, then TaskDecomposition class in correct order
+```
+### Field Descriptions (Included by Default)
+```ruby
+# Smart defaults extract field descriptions from comments
+TaskDecomposition.to_baml
+# Outputs BAML with @description() annotations for LLM context
+```
+### Custom Formatting
+```ruby
+# Smart defaults include dependencies and descriptions automatically
+TaskDecomposition.to_baml(indent_size: 4)
+# Disable features if needed:
+TaskDecomposition.to_baml(include_descriptions: false)
+```
+## ✅ Completed Features
+- ✅ `T::Struct` → `class Name { ... }` with field descriptions
+- ✅ `T::Enum` → `enum Name { ... }` with value descriptions
+- ✅ Automatic dependency resolution and ordering
+- ✅ Smart defaults (descriptions and dependencies enabled)
+- ✅ Full type safety with Sorbet type checking
+## Future Enhancements (Optional)
+- `T.type_alias` → `type Name = ...`
+- Custom naming strategies (snake_case ↔ camelCase)
+- Self-referential type handling

data/docs-site/tailwind.config.js ADDED Viewed

@@ -0,0 +1,85 @@
+/** @type {import('tailwindcss').Config} */
+module.exports = {
+  content: [
+    "./src/**/*.{html,md,liquid,erb}",
+    "./frontend/**/*.js",
+  ],
+  darkMode: 'media',
+  theme: {
+    extend: {
+      typography: {
+        DEFAULT: {
+          css: {
+            maxWidth: 'none',
+            color: 'inherit',
+            a: {
+              color: '#3b82f6',
+              textDecoration: 'underline',
+              fontWeight: '500',
+            },
+            '[class~="lead"]': {
+              color: 'inherit',
+            },
+            strong: {
+              color: 'inherit',
+            },
+            'ol > li::before': {
+              color: 'inherit',
+            },
+            'ul > li::before': {
+              backgroundColor: 'currentColor',
+            },
+            hr: {
+              borderColor: 'currentColor',
+              opacity: 0.3,
+            },
+            blockquote: {
+              color: 'inherit',
+              borderLeftColor: 'currentColor',
+              opacity: 0.8,
+            },
+            h1: {
+              color: 'inherit',
+            },
+            h2: {
+              color: 'inherit',
+            },
+            h3: {
+              color: 'inherit',
+            },
+            h4: {
+              color: 'inherit',
+            },
+            'figure figcaption': {
+              color: 'inherit',
+            },
+            code: {
+              color: 'inherit',
+              backgroundColor: 'rgba(156, 163, 175, 0.2)',
+              padding: '0.125rem 0.25rem',
+              borderRadius: '0.25rem',
+              fontWeight: '400',
+            },
+            'code::before': {
+              content: '""',
+            },
+            'code::after': {
+              content: '""',
+            },
+            pre: {
+              backgroundColor: '#1f2937',
+              color: '#f9fafb',
+            },
+            'pre code': {
+              backgroundColor: 'transparent',
+              color: 'inherit',
+            },
+          },
+        },
+      },
+    },
+  },
+  plugins: [
+    require('@tailwindcss/typography'),
+  ],
+}

data/examples/description_parameters.rb ADDED Viewed

@@ -0,0 +1,49 @@
+#!/usr/bin/env ruby
+# typed: false
+require_relative '../lib/sorbet_baml'
+puts '🎯 Description Parameter Support Demo'
+puts '=' * 50
+# Example 1: Basic description parameters
+class User < T::Struct
+  const :name, String, description: "User's full legal name"
+  prop :age, Integer, description: 'Age in years'
+  const :email, T.nilable(String), description: 'Optional email address for notifications'
+  const :interests, T::Array[String], description: 'List of user hobbies and interests'
+end
+puts "\n1. Basic T::Struct with description parameters:"
+puts User.to_baml
+# Example 2: Mixed description sources (parameters + comments)
+class Product < T::Struct
+  # This comment will be used as fallback
+  const :id, String
+  const :name, String, description: 'Product name for display'
+  # Price in USD cents
+  prop :price_cents, Integer
+  const :category, String, description: 'Product category classification'
+end
+puts "\n2. Mixed description sources (parameters take priority):"
+puts Product.to_baml
+# Example 3: Complex nested types with descriptions
+class Order < T::Struct
+  const :id, String, description: 'Unique order identifier'
+  const :customer, User, description: 'Customer who placed the order'
+  const :items, T::Array[Product], description: 'List of ordered products'
+  const :total_cents, Integer, description: 'Total order value in USD cents'
+  const :status, String, description: 'Current order processing status'
+end
+puts "\n3. Complex nested types with dependencies:"
+puts Order.to_baml
+puts "\n✨ Beautiful, readable, and LLM-friendly!"
+puts '🚀 Perfect for DSPy.rb, autonomous agents, and structured LLM outputs'

data/lib/sorbet_baml/comment_extractor.rb CHANGED Viewed

@@ -8,101 +8,100 @@ module SorbetBaml
     sig { params(klass: T.class_of(T::Struct)).returns(T::Hash[String, T.nilable(String)]) }
     def self.extract_field_comments(klass)
+      # First try to get descriptions from the description extractor (extra field)
+      descriptions = DescriptionExtractor.extract_prop_descriptions(klass)
+      # Then fall back to comment-based extraction for any missing descriptions
       comments = {}
       source_file = find_source_file(klass)
-      return comments unless source_file && File.exist?(source_file)
-      lines = File.readlines(source_file)
-      extract_comments_from_lines(lines, klass.name.split('::').last, comments)
-      comments
+      if source_file && File.exist?(source_file)
+        lines = File.readlines(source_file)
+        extract_comments_from_lines(lines, T.must(T.must(klass.name).split('::').last), comments)
+      end
+      # Merge with priority: description parameters > comments
+      descriptions.merge(comments) { |_key, desc_param, _comment| desc_param }
     end
     sig { params(klass: T.class_of(T::Enum)).returns(T::Hash[String, T.nilable(String)]) }
     def self.extract_enum_comments(klass)
       comments = {}
       source_file = find_source_file(klass)
       return comments unless source_file && File.exist?(source_file)
       lines = File.readlines(source_file)
-      extract_enum_comments_from_lines(lines, klass.name.split('::').last, comments)
+      extract_enum_comments_from_lines(lines, T.must(T.must(klass.name).split('::').last), comments)
       comments
     end
-    private
-    sig { params(klass: Class).returns(T.nilable(String)) }
+    sig { params(klass: T::Class[T.anything]).returns(T.nilable(String)) }
     def self.find_source_file(klass)
       # Try to find where the class was defined
       # This is a heuristic approach since Ruby doesn't provide reliable source location for classes
       # Method 1: Check if any methods have source location
       begin
         if klass.respond_to?(:new) && klass.method(:new).respond_to?(:source_location)
           location = klass.method(:new).source_location
           return location[0] if location
         end
-      rescue
+      rescue StandardError
         # Ignore errors
       end
       # Method 2: Look at the current call stack for files that might contain the class
       caller_locations.each do |location|
         file_path = location.absolute_path || location.path
         next unless file_path && File.exist?(file_path)
         # Read the file and check if it contains the class definition
         begin
           content = File.read(file_path)
-          class_name = klass.name.split('::').last
-          if content.match(/class\s+#{Regexp.escape(class_name)}\s*</)
-            return file_path
-          end
-        rescue
+          class_name = T.must(klass.name).split('::').last
+          return file_path if content.match(/class\s+#{Regexp.escape(T.must(class_name))}\s*</)
+        rescue StandardError
           # Ignore file read errors
         end
       end
       nil
     end
     sig { params(lines: T::Array[String], class_name: String, comments: T::Hash[String, T.nilable(String)]).void }
     def self.extract_comments_from_lines(lines, class_name, comments)
-      in_target_class = false
+      in_target_class = T.let(false, T::Boolean)
       current_comment = T.let(nil, T.nilable(String))
       brace_depth = 0
       lines.each do |line|
         stripped = line.strip
         # Check if we're entering the target class
         if stripped.match(/^class\s+#{Regexp.escape(class_name)}\s*<\s*T::Struct/)
           in_target_class = true
           brace_depth = 0
           next
         end
         next unless in_target_class
         # Track brace depth to handle nested classes
         brace_depth += stripped.count('{')
         brace_depth -= stripped.count('}')
         # Exit when we reach the end of the class
-        if stripped == 'end' && brace_depth == 0
-          break
-        end
+        break if stripped == 'end' && brace_depth == 0
         # Extract comment
         if stripped.start_with?('#')
-          comment_text = stripped[1..-1].strip
+          comment_text = T.must(stripped[1..-1]).strip
           current_comment = current_comment ? "#{current_comment} #{comment_text}" : comment_text
         elsif stripped.match(/^const\s+:(\w+)/) && current_comment
-          field_name = stripped.match(/^const\s+:(\w+)/)[1]
-          comments[field_name] = current_comment
+          field_name = T.must(stripped.match(/^const\s+:(\w+)/))[1]
+          comments[T.must(field_name)] = current_comment
           current_comment = nil
         elsif !stripped.empty? && !stripped.start_with?('#')
           # Reset comment if we hit non-comment, non-const line
@@ -113,47 +112,45 @@ module SorbetBaml
     sig { params(lines: T::Array[String], class_name: String, comments: T::Hash[String, T.nilable(String)]).void }
     def self.extract_enum_comments_from_lines(lines, class_name, comments)
-      in_target_class = false
-      in_enums_block = false
+      in_target_class = T.let(false, T::Boolean)
+      in_enums_block = T.let(false, T::Boolean)
       current_comment = T.let(nil, T.nilable(String))
       lines.each do |line|
         stripped = line.strip
         # Check if we're entering the target enum class
         if stripped.match(/^class\s+#{Regexp.escape(class_name)}\s*<\s*T::Enum/)
           in_target_class = true
           next
         end
         next unless in_target_class
         # Check if we're in the enums block
         if stripped == 'enums do'
           in_enums_block = true
           next
         end
         # Exit enums block
         if in_enums_block && stripped == 'end'
           in_enums_block = false
           next
         end
         # Exit class
-        if stripped == 'end' && !in_enums_block
-          break
-        end
+        break if stripped == 'end' && !in_enums_block
         next unless in_enums_block
         # Extract comment
         if stripped.start_with?('#')
-          comment_text = stripped[1..-1].strip
+          comment_text = T.must(stripped[1..-1]).strip
           current_comment = current_comment ? "#{current_comment} #{comment_text}" : comment_text
         elsif stripped.match(/^(\w+)\s*=\s*new/) && current_comment
-          enum_name = stripped.match(/^(\w+)\s*=\s*new/)[1]
-          comments[enum_name] = current_comment
+          enum_name = T.must(stripped.match(/^(\w+)\s*=\s*new/))[1]
+          comments[T.must(enum_name)] = current_comment
           current_comment = nil
         elsif !stripped.empty? && !stripped.start_with?('#')
           # Reset comment if we hit non-comment, non-enum line
@@ -162,4 +159,4 @@ module SorbetBaml
       end
     end
   end
-end
+end