structify 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,10 +3,11 @@
3
3
  require "active_support/concern"
4
4
  require "active_support/core_ext/class/attribute"
5
5
  require "attr_json"
6
+ require_relative "schema_serializer"
6
7
 
7
8
  module Structify
8
9
  # The Model module provides a DSL for defining LLM extraction schemas in your Rails models.
9
- # It allows you to define fields, versioning, and assistant prompts for LLM-based data extraction.
10
+ # It allows you to define fields, versioning, and validation for LLM-based data extraction.
10
11
  #
11
12
  # @example
12
13
  # class Article < ApplicationRecord
@@ -16,8 +17,6 @@ module Structify
16
17
  # title "Article Extraction"
17
18
  # description "Extract article metadata"
18
19
  # version 1
19
- # assistant_prompt "Extract the following fields from the article"
20
- # llm_model "gpt-4"
21
20
  #
22
21
  # field :title, :string, required: true
23
22
  # field :summary, :text, description: "A brief summary of the article"
@@ -31,8 +30,40 @@ module Structify
31
30
  include AttrJson::Record
32
31
  class_attribute :schema_builder, instance_writer: false, default: nil
33
32
 
34
- # Store all extracted data in the extracted_data JSON column
35
- attr_json_config(default_container_attribute: :extracted_data)
33
+ # Use the configured default container attribute
34
+ attr_json_config(default_container_attribute: Structify.configuration.default_container_attribute)
35
+ end
36
+
37
+ # Instance methods
38
+ def version_compatible_with?(required_version)
39
+ container_attribute = self.class.attr_json_config.default_container_attribute
40
+ record_data = self.send(container_attribute) || {}
41
+ record_version = record_data["version"] || 1
42
+ record_version >= required_version
43
+ end
44
+
45
+ # Get the stored version of this record
46
+ def stored_version
47
+ container_attribute = self.class.attr_json_config.default_container_attribute
48
+ record_data = self.send(container_attribute) || {}
49
+ record_data["version"] || 1
50
+ end
51
+
52
+ # Check if a version is within a given range/array of versions
53
+ # This is used in field accessors to check version compatibility
54
+ #
55
+ # @param version [Integer] The version to check
56
+ # @param range [Range, Array, Integer] The range, array, or single version to check against
57
+ # @return [Boolean] Whether the version is within the range
58
+ def version_in_range?(version, range)
59
+ case range
60
+ when Range
61
+ range.cover?(version)
62
+ when Array
63
+ range.include?(version)
64
+ else
65
+ version == range
66
+ end
36
67
  end
37
68
 
38
69
  # Class methods added to the including class
@@ -60,19 +91,6 @@ module Structify
60
91
  schema_builder&.version_number
61
92
  end
62
93
 
63
- # Get the assistant prompt
64
- #
65
- # @return [String] The assistant prompt
66
- def extraction_assistant_prompt
67
- schema_builder&.assistant_prompt_str
68
- end
69
-
70
- # Get the LLM model name
71
- #
72
- # @return [String] The model name
73
- def extraction_llm_model
74
- schema_builder&.model_name
75
- end
76
94
  end
77
95
  end
78
96
 
@@ -82,11 +100,9 @@ module Structify
82
100
  # @return [Array<Hash>] The field definitions
83
101
  # @return [String] The schema title
84
102
  # @return [String] The schema description
85
- # @return [String] The assistant prompt
86
- # @return [String] The LLM model name
87
103
  # @return [Integer] The schema version
88
- attr_reader :model, :fields, :title_str, :description_str,
89
- :assistant_prompt_str, :model_name, :version_number
104
+ # @return [Boolean] Whether thinking mode is enabled
105
+ attr_reader :model, :fields, :title_str, :description_str, :version_number, :thinking_enabled
90
106
 
91
107
  # Initialize a new SchemaBuilder
92
108
  #
@@ -94,9 +110,17 @@ module Structify
94
110
  def initialize(model)
95
111
  @model = model
96
112
  @fields = []
97
- @assistant_prompt_str = nil
98
- @model_name = nil
99
113
  @version_number = 1
114
+ @thinking_enabled = false
115
+ end
116
+
117
+ # Enable or disable thinking mode
118
+ # When enabled, the LLM will be asked to provide chain of thought reasoning
119
+ #
120
+ # @param enabled [Boolean] Whether to enable thinking mode
121
+ # @return [void]
122
+ def thinking(enabled)
123
+ @thinking_enabled = enabled
100
124
  end
101
125
 
102
126
  # Set the schema title
@@ -121,24 +145,15 @@ module Structify
121
145
  # @return [void]
122
146
  def version(num)
123
147
  @version_number = num
124
- model.attribute :version, :integer, default: num
148
+
149
+ # Define version as an attr_json field so it's stored in extracted_data
150
+ model.attr_json :version, :integer, default: num
151
+
152
+ # Store mapping of fields to their introduction version
153
+ @fields_by_version ||= {}
154
+ @fields_by_version[num] ||= []
125
155
  end
126
156
 
127
- # Set the assistant prompt
128
- #
129
- # @param prompt [String] The prompt text
130
- # @return [void]
131
- def assistant_prompt(prompt)
132
- @assistant_prompt_str = prompt.strip
133
- end
134
-
135
- # Set the LLM model name
136
- #
137
- # @param name [String] The model name
138
- # @return [void]
139
- def llm_model(name)
140
- @model_name = name
141
- end
142
157
 
143
158
  # Define a field in the schema
144
159
  #
@@ -147,40 +162,269 @@ module Structify
147
162
  # @param required [Boolean] Whether the field is required
148
163
  # @param description [String] The field description
149
164
  # @param enum [Array] Possible values for the field
165
+ # @param items [Hash] For array type, defines the schema for array items
166
+ # @param properties [Hash] For object type, defines the properties of the object
167
+ # @param min_items [Integer] For array type, minimum number of items
168
+ # @param max_items [Integer] For array type, maximum number of items
169
+ # @param unique_items [Boolean] For array type, whether items must be unique
170
+ # @param versions [Range, Array, Integer] The versions this field is available in (default: current version onwards)
150
171
  # @return [void]
151
- def field(name, type, required: false, description: nil, enum: nil)
152
- fields << {
172
+ def field(name, type, required: false, description: nil, enum: nil,
173
+ items: nil, properties: nil, min_items: nil, max_items: nil,
174
+ unique_items: nil, versions: nil)
175
+
176
+ # Handle version information
177
+ version_range = if versions
178
+ # Use the versions parameter if provided
179
+ versions
180
+ else
181
+ # Default: field is available in all versions
182
+ 1..999
183
+ end
184
+
185
+ # Check if the field is applicable for the current schema version
186
+ field_available = version_in_range?(@version_number, version_range)
187
+
188
+ # Skip defining the field in the schema if it's not applicable to the current version
189
+ unless field_available
190
+ # Still define an accessor that raises an appropriate error
191
+ define_version_range_accessor(name, version_range)
192
+ return
193
+ end
194
+
195
+ # Calculate a simple introduced_in for backward compatibility
196
+ effective_introduced_in = case version_range
197
+ when Range
198
+ version_range.begin
199
+ when Array
200
+ version_range.min
201
+ else
202
+ version_range
203
+ end
204
+
205
+ field_definition = {
153
206
  name: name,
154
207
  type: type,
155
208
  required: required,
156
209
  description: description,
157
- enum: enum
210
+ version_range: version_range,
211
+ introduced_in: effective_introduced_in
158
212
  }
213
+
214
+ # Add enum if provided
215
+ field_definition[:enum] = enum if enum
216
+
217
+ # Array specific properties
218
+ if type == :array
219
+ field_definition[:items] = items if items
220
+ field_definition[:min_items] = min_items if min_items
221
+ field_definition[:max_items] = max_items if max_items
222
+ field_definition[:unique_items] = unique_items if unique_items
223
+ end
224
+
225
+ # Object specific properties
226
+ if type == :object
227
+ field_definition[:properties] = properties if properties
228
+ end
229
+
230
+ fields << field_definition
231
+
232
+ # Track field by its version range
233
+ @fields_by_version ||= {}
234
+ @fields_by_version[effective_introduced_in] ||= []
235
+ @fields_by_version[effective_introduced_in] << name
236
+
237
+ # Map JSON Schema types to Ruby/AttrJson types
238
+ attr_type = case type
239
+ when :integer, :number
240
+ :integer
241
+ when :array
242
+ :json
243
+ when :object
244
+ :json
245
+ when :boolean
246
+ :boolean
247
+ else
248
+ type # string, text stay the same
249
+ end
159
250
 
251
+ # Define custom accessor that checks version compatibility
252
+ define_version_range_accessors(name, attr_type, version_range)
253
+ end
254
+
255
+ # Check if a version is within a given range/array of versions
256
+ #
257
+ # @param version [Integer] The version to check
258
+ # @param range [Range, Array, Integer] The range, array, or single version to check against
259
+ # @return [Boolean] Whether the version is within the range
260
+ def version_in_range?(version, range)
261
+ case range
262
+ when Range
263
+ # Handle endless ranges (Ruby 2.6+): 2.. means 2 and above
264
+ if range.end.nil?
265
+ version >= range.begin
266
+ else
267
+ range.cover?(version)
268
+ end
269
+ when Array
270
+ range.include?(version)
271
+ else
272
+ # A single integer means "this version and onwards"
273
+ version >= range
274
+ end
275
+ end
276
+
277
+ # Define accessor methods that check version compatibility using the new version ranges
278
+ #
279
+ # @param name [Symbol] The field name
280
+ # @param type [Symbol] The field type for attr_json
281
+ # @param version_range [Range, Array, Integer] The versions this field is available in
282
+ # @return [void]
283
+ def define_version_range_accessors(name, type, version_range)
284
+ # Define the attr_json normally first
160
285
  model.attr_json name, type
286
+
287
+ # Extract current version for error messages
288
+ schema_version = @version_number
289
+
290
+ # Then override the reader method to check versions
291
+ model.class_eval <<-RUBY, __FILE__, __LINE__ + 1
292
+ # Store original method
293
+ alias_method :_original_#{name}, :#{name}
294
+
295
+ # Override reader to check version compatibility
296
+ def #{name}
297
+ # Get the container attribute and data
298
+ container_attribute = self.class.attr_json_config.default_container_attribute
299
+ record_data = self.send(container_attribute)
300
+
301
+ # Get the version from the record data
302
+ record_version = record_data && record_data["version"] ?
303
+ record_data["version"] : 1
304
+
305
+ # Check if record version is compatible with field's version range
306
+ field_version_range = #{version_range.inspect}
307
+
308
+ # Handle field lifecycle based on version
309
+ unless version_in_range?(record_version, field_version_range)
310
+ # Check if this is a removed field (was valid in earlier versions but not current version)
311
+ if field_version_range.is_a?(Range) && field_version_range.begin <= record_version && field_version_range.end < #{schema_version}
312
+ raise Structify::RemovedFieldError.new(
313
+ "#{name}",
314
+ field_version_range.end
315
+ )
316
+ # Check if this is a new field (only valid in later versions)
317
+ elsif (field_version_range.is_a?(Range) && field_version_range.begin > record_version) ||
318
+ (field_version_range.is_a?(Integer) && field_version_range > record_version)
319
+ raise Structify::VersionRangeError.new(
320
+ "#{name}",
321
+ record_version,
322
+ field_version_range
323
+ )
324
+ # Otherwise it's just not in the valid range
325
+ else
326
+ raise Structify::VersionRangeError.new(
327
+ "#{name}",
328
+ record_version,
329
+ field_version_range
330
+ )
331
+ end
332
+ end
333
+
334
+ # Check for deprecated fields and show warning
335
+ if field_version_range.is_a?(Range) &&
336
+ field_version_range.begin < #{schema_version} &&
337
+ field_version_range.end < 999 &&
338
+ field_version_range.cover?(record_version)
339
+ ActiveSupport::Deprecation.warn(
340
+ "Field '#{name}' is deprecated as of version #{schema_version} and will be removed in version \#{field_version_range.end}."
341
+ )
342
+ end
343
+
344
+ # Call original method
345
+ _original_#{name}
346
+ end
347
+ RUBY
348
+ end
349
+
350
+ # Define accessor for fields that are not in the current schema version
351
+ # These will raise an appropriate error when accessed
352
+ #
353
+ # @param name [Symbol] The field name
354
+ # @param version_range [Range, Array, Integer] The versions this field is available in
355
+ # @return [void]
356
+ def define_version_range_accessor(name, version_range)
357
+ # Capture schema version to use in the eval block
358
+ schema_version = @version_number
359
+
360
+ # Handle different version range types
361
+ version_range_type = case version_range
362
+ when Range
363
+ "range"
364
+ when Array
365
+ "array"
366
+ else
367
+ "integer"
368
+ end
369
+
370
+ # Extract begin/end values for ranges
371
+ range_begin = case version_range
372
+ when Range
373
+ version_range.begin
374
+ when Array
375
+ version_range.min
376
+ else
377
+ version_range
378
+ end
379
+
380
+ range_end = case version_range
381
+ when Range
382
+ version_range.end
383
+ when Array
384
+ version_range.max
385
+ else
386
+ version_range
387
+ end
388
+
389
+ model.class_eval <<-RUBY, __FILE__, __LINE__ + 1
390
+ # Define an accessor that raises an error when accessed
391
+ def #{name}
392
+ # Based on the version_range type, create appropriate errors
393
+ case "#{version_range_type}"
394
+ when "range"
395
+ if #{range_begin} <= #{schema_version} && #{range_end} < #{schema_version}
396
+ # Removed field
397
+ raise Structify::RemovedFieldError.new("#{name}", #{range_end})
398
+ elsif #{range_begin} > #{schema_version}
399
+ # Field from future version
400
+ raise Structify::VersionRangeError.new("#{name}", #{schema_version}, #{version_range.inspect})
401
+ else
402
+ # Not in range for other reasons
403
+ raise Structify::VersionRangeError.new("#{name}", #{schema_version}, #{version_range.inspect})
404
+ end
405
+ when "array"
406
+ # For arrays, we can only check if the current version is in the array
407
+ raise Structify::VersionRangeError.new("#{name}", #{schema_version}, #{version_range.inspect})
408
+ else
409
+ # For integers, just report version mismatch
410
+ raise Structify::VersionRangeError.new("#{name}", #{schema_version}, #{version_range.inspect})
411
+ end
412
+ end
413
+
414
+ # Define a writer that raises an error too
415
+ def #{name}=(value)
416
+ # Use the same error logic as the reader
417
+ self.#{name}
418
+ end
419
+ RUBY
161
420
  end
162
421
 
163
422
  # Generate the JSON schema representation
164
423
  #
165
424
  # @return [Hash] The JSON schema
166
425
  def to_json_schema
167
- required_fields = fields.select { |f| f[:required] }.map { |f| f[:name].to_s }
168
- properties_hash = fields.each_with_object({}) do |f, hash|
169
- prop = { type: f[:type].to_s }
170
- prop[:description] = f[:description] if f[:description]
171
- prop[:enum] = f[:enum] if f[:enum]
172
- hash[f[:name].to_s] = prop
173
- end
174
-
175
- {
176
- name: title_str,
177
- description: description_str,
178
- parameters: {
179
- type: "object",
180
- required: required_fields,
181
- properties: properties_hash
182
- }
183
- }
426
+ serializer = SchemaSerializer.new(self)
427
+ serializer.to_json_schema
184
428
  end
185
429
  end
186
430
  end
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Structify
4
+ # Handles serialization of schema definitions to different formats
5
+ class SchemaSerializer
6
+ # @return [Structify::SchemaBuilder] The schema builder to serialize
7
+ attr_reader :schema_builder
8
+
9
+ # Initialize a new SchemaSerializer
10
+ #
11
+ # @param schema_builder [Structify::SchemaBuilder] The schema builder to serialize
12
+ def initialize(schema_builder)
13
+ @schema_builder = schema_builder
14
+ end
15
+
16
+ # Generate the JSON schema representation
17
+ #
18
+ # @return [Hash] The JSON schema
19
+ def to_json_schema
20
+ # Get current schema version
21
+ current_version = schema_builder.version_number
22
+
23
+ # Get fields that are applicable to the current schema version
24
+ fields = schema_builder.fields.select do |f|
25
+ # Check if the field has a version_range
26
+ if f[:version_range]
27
+ version_in_range?(current_version, f[:version_range])
28
+ # Legacy check for removed_in
29
+ elsif f[:removed_in]
30
+ f[:removed_in] > current_version
31
+ else
32
+ true
33
+ end
34
+ end
35
+
36
+ # Get required fields (excluding fields not in the current version)
37
+ required_fields = fields.select { |f| f[:required] }.map { |f| f[:name].to_s }
38
+
39
+ # Start with chain_of_thought if thinking mode is enabled
40
+ properties_hash = {}
41
+ if schema_builder.thinking_enabled
42
+ properties_hash["chain_of_thought"] = {
43
+ type: "string",
44
+ description: "Explain your thought process step by step before determining the final values."
45
+ }
46
+ end
47
+
48
+ # Add all other fields
49
+ fields.each_with_object(properties_hash) do |f, hash|
50
+ # Start with the basic type
51
+ prop = { type: f[:type].to_s }
52
+
53
+ # Add description if available
54
+ prop[:description] = f[:description] if f[:description]
55
+
56
+ # Add enum if available
57
+ prop[:enum] = f[:enum] if f[:enum]
58
+
59
+ # Handle array specific properties
60
+ if f[:type] == :array
61
+ # Add items schema
62
+ prop[:items] = f[:items] if f[:items]
63
+
64
+ # Add array constraints
65
+ prop[:minItems] = f[:min_items] if f[:min_items]
66
+ prop[:maxItems] = f[:max_items] if f[:max_items]
67
+ prop[:uniqueItems] = f[:unique_items] if f[:unique_items]
68
+ end
69
+
70
+ # Handle object specific properties
71
+ if f[:type] == :object && f[:properties]
72
+ prop[:properties] = {}
73
+ required_props = []
74
+
75
+ # Process each property
76
+ f[:properties].each do |prop_name, prop_def|
77
+ prop[:properties][prop_name] = prop_def.dup
78
+
79
+ # If a property is marked as required, add it to required list and remove from property definition
80
+ if prop_def[:required]
81
+ required_props << prop_name
82
+ prop[:properties][prop_name].delete(:required)
83
+ end
84
+ end
85
+
86
+ # Add required array if we have required properties
87
+ prop[:required] = required_props unless required_props.empty?
88
+ end
89
+
90
+ # Add version info to description only if requested by environment variable
91
+ # This allows for backward compatibility with existing tests
92
+ if ENV["STRUCTIFY_SHOW_VERSION_INFO"] && f[:version_range] && prop[:description]
93
+ version_info = format_version_range(f[:version_range])
94
+ prop[:description] = "#{prop[:description]} (Available in versions: #{version_info})"
95
+ elsif ENV["STRUCTIFY_SHOW_VERSION_INFO"] && f[:version_range]
96
+ prop[:description] = "Available in versions: #{format_version_range(f[:version_range])}"
97
+ end
98
+
99
+ # Legacy: Add a deprecation notice to description
100
+ if f[:deprecated_in] && f[:deprecated_in] <= current_version
101
+ deprecation_note = "Deprecated in v#{f[:deprecated_in]}. "
102
+ prop[:description] = if prop[:description]
103
+ "#{deprecation_note}#{prop[:description]}"
104
+ else
105
+ deprecation_note
106
+ end
107
+ end
108
+
109
+ hash[f[:name].to_s] = prop
110
+ end
111
+
112
+ {
113
+ name: schema_builder.title_str,
114
+ description: schema_builder.description_str,
115
+ parameters: {
116
+ type: "object",
117
+ required: required_fields,
118
+ properties: properties_hash
119
+ }
120
+ }
121
+ end
122
+
123
+ private
124
+
125
+ # Check if a version is within a given range/array of versions
126
+ #
127
+ # @param version [Integer] The version to check
128
+ # @param range [Range, Array, Integer] The range, array, or single version to check against
129
+ # @return [Boolean] Whether the version is within the range
130
+ def version_in_range?(version, range)
131
+ case range
132
+ when Range
133
+ # Handle endless ranges (Ruby 2.6+): 2.. means 2 and above
134
+ if range.end.nil?
135
+ version >= range.begin
136
+ else
137
+ range.cover?(version)
138
+ end
139
+ when Array
140
+ range.include?(version)
141
+ else
142
+ # A single integer means "this version and onwards"
143
+ version >= range
144
+ end
145
+ end
146
+
147
+ # Format a version range for display in error messages
148
+ #
149
+ # @param versions [Range, Array, Integer] The version range to format
150
+ # @return [String] A human-readable version range
151
+ def format_version_range(versions)
152
+ if versions.is_a?(Range)
153
+ if versions.end.nil?
154
+ "#{versions.begin} and above"
155
+ else
156
+ "#{versions.begin} to #{versions.end}#{versions.exclude_end? ? ' (exclusive)' : ''}"
157
+ end
158
+ elsif versions.is_a?(Array)
159
+ versions.join(", ")
160
+ else
161
+ "#{versions} and above" # Single integer means this version and onwards
162
+ end
163
+ end
164
+ end
165
+ end
@@ -1,3 +1,3 @@
1
1
  module Structify
2
- VERSION = "0.1.0"
2
+ VERSION = "0.3.0"
3
3
  end