prescient 0.0.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,330 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example: Vector similarity search with Prescient gem and PostgreSQL pgvector
5
+ # This example demonstrates how to store embeddings and perform similarity search
6
+
7
+ require_relative '../lib/prescient'
8
+ require 'pg'
9
+ require 'json'
10
+
11
+ puts "=== Vector Similarity Search Example ==="
12
+ puts "This example shows how to use Prescient with PostgreSQL pgvector for semantic search."
13
+
14
+ # Database connection configuration
15
+ DB_CONFIG = {
16
+ host: ENV.fetch('DB_HOST', 'localhost'),
17
+ port: ENV.fetch('DB_PORT', '5432'),
18
+ dbname: ENV.fetch('DB_NAME', 'prescient_development'),
19
+ user: ENV.fetch('DB_USER', 'prescient'),
20
+ password: ENV.fetch('DB_PASSWORD', 'prescient_password')
21
+ }.freeze
22
+
23
+ class VectorSearchExample
24
+ def initialize
25
+ @db = PG.connect(DB_CONFIG)
26
+ @client = Prescient.client(:ollama)
27
+ end
28
+
29
+ def run_example
30
+ puts "\n--- Setting up vector search example ---"
31
+
32
+ # Check if services are available
33
+ unless check_services_available
34
+ puts "āŒ Required services not available. Please start with: docker-compose up -d"
35
+ return
36
+ end
37
+
38
+ # 1. Generate and store embeddings for existing documents
39
+ puts "\nšŸ“Š Generating embeddings for sample documents..."
40
+ generate_document_embeddings
41
+
42
+ # 2. Perform similarity search
43
+ puts "\nšŸ” Performing similarity searches..."
44
+ search_examples
45
+
46
+ # 3. Advanced search with filtering
47
+ puts "\nšŸŽÆ Advanced search with metadata filtering..."
48
+ advanced_search_examples
49
+
50
+ # 4. Demonstrate different distance functions
51
+ puts "\nšŸ“ Comparing different distance functions..."
52
+ compare_distance_functions
53
+
54
+ puts "\nšŸŽ‰ Vector search example completed!"
55
+ end
56
+
57
+ private
58
+
59
+ def check_services_available
60
+ # Check database connection
61
+ begin
62
+ result = @db.exec("SELECT 1")
63
+ puts "āœ… PostgreSQL connected"
64
+ rescue PG::Error => e
65
+ puts "āŒ PostgreSQL connection failed: #{e.message}"
66
+ return false
67
+ end
68
+
69
+ # Check pgvector extension
70
+ begin
71
+ result = @db.exec("SELECT * FROM pg_extension WHERE extname = 'vector'")
72
+ if result.ntuples > 0
73
+ puts "āœ… pgvector extension available"
74
+ else
75
+ puts "āŒ pgvector extension not found"
76
+ return false
77
+ end
78
+ rescue PG::Error => e
79
+ puts "āŒ pgvector check failed: #{e.message}"
80
+ return false
81
+ end
82
+
83
+ # Check Ollama connection
84
+ if @client.available?
85
+ puts "āœ… Ollama connected"
86
+ else
87
+ puts "āŒ Ollama not available"
88
+ return false
89
+ end
90
+
91
+ true
92
+ end
93
+
94
+ def generate_document_embeddings
95
+ # Get documents that don't have embeddings yet
96
+ query = <<~SQL
97
+ SELECT d.id, d.title, d.content
98
+ FROM documents d
99
+ LEFT JOIN document_embeddings de ON d.id = de.document_id
100
+ AND de.embedding_provider = 'ollama'
101
+ AND de.embedding_model = 'nomic-embed-text'
102
+ WHERE de.id IS NULL
103
+ LIMIT 10
104
+ SQL
105
+
106
+ result = @db.exec(query)
107
+
108
+ if result.ntuples == 0
109
+ puts " All documents already have embeddings"
110
+ return
111
+ end
112
+
113
+ result.each do |row|
114
+ document_id = row['id']
115
+ title = row['title']
116
+ content = row['content']
117
+
118
+ puts " Generating embedding for: #{title}"
119
+
120
+ begin
121
+ # Generate embedding using Prescient
122
+ embedding = @client.generate_embedding(content)
123
+
124
+ # Store in database
125
+ insert_embedding(document_id, embedding, content, 'ollama', 'nomic-embed-text', 768)
126
+
127
+ puts " āœ… Stored embedding (#{embedding.length} dimensions)"
128
+
129
+ rescue Prescient::Error => e
130
+ puts " āŒ Failed to generate embedding: #{e.message}"
131
+ end
132
+ end
133
+ end
134
+
135
+ def insert_embedding(document_id, embedding, text, provider, model, dimensions)
136
+ # Convert Ruby array to PostgreSQL vector format
137
+ vector_str = "[#{embedding.join(',')}]"
138
+
139
+ query = <<~SQL
140
+ INSERT INTO document_embeddings
141
+ (document_id, embedding_provider, embedding_model, embedding_dimensions, embedding, embedding_text)
142
+ VALUES ($1, $2, $3, $4, $5, $6)
143
+ SQL
144
+
145
+ @db.exec_params(query, [document_id, provider, model, dimensions, vector_str, text])
146
+ end
147
+
148
+ def search_examples
149
+ search_queries = [
150
+ "How to learn programming?",
151
+ "What is machine learning?",
152
+ "Database optimization techniques",
153
+ "API security best practices"
154
+ ]
155
+
156
+ search_queries.each do |query_text|
157
+ puts "\nšŸ” Searching for: '#{query_text}'"
158
+ perform_similarity_search(query_text, limit: 3)
159
+ end
160
+ end
161
+
162
+ def perform_similarity_search(query_text, limit: 5, distance_function: 'cosine')
163
+ begin
164
+ # Generate embedding for query
165
+ query_embedding = @client.generate_embedding(query_text)
166
+ query_vector = "[#{query_embedding.join(',')}]"
167
+
168
+ # Choose distance operator based on function
169
+ distance_op = case distance_function
170
+ when 'cosine' then '<=>'
171
+ when 'l2' then '<->'
172
+ when 'inner_product' then '<#>'
173
+ else '<=>'
174
+ end
175
+
176
+ # Perform similarity search
177
+ search_query = <<~SQL
178
+ SELECT
179
+ d.title,
180
+ d.content,
181
+ d.metadata,
182
+ de.embedding #{distance_op} $1::vector AS distance,
183
+ 1 - (de.embedding <=> $1::vector) AS cosine_similarity
184
+ FROM documents d
185
+ JOIN document_embeddings de ON d.id = de.document_id
186
+ WHERE de.embedding_provider = 'ollama'
187
+ AND de.embedding_model = 'nomic-embed-text'
188
+ ORDER BY de.embedding #{distance_op} $1::vector
189
+ LIMIT $2
190
+ SQL
191
+
192
+ result = @db.exec_params(search_query, [query_vector, limit])
193
+
194
+ if result.ntuples == 0
195
+ puts " No results found"
196
+ return
197
+ end
198
+
199
+ result.each_with_index do |row, index|
200
+ similarity = (row['cosine_similarity'].to_f * 100).round(1)
201
+ puts " #{index + 1}. #{row['title']} (#{similarity}% similar)"
202
+ puts " #{row['content'][0..100]}..."
203
+
204
+ # Show metadata if available
205
+ if row['metadata'] && !row['metadata'].empty?
206
+ metadata = JSON.parse(row['metadata'])
207
+ tags = metadata['tags']&.join(', ')
208
+ puts " Tags: #{tags}" if tags
209
+ end
210
+ puts
211
+ end
212
+
213
+ rescue Prescient::Error => e
214
+ puts " āŒ Search failed: #{e.message}"
215
+ rescue PG::Error => e
216
+ puts " āŒ Database error: #{e.message}"
217
+ end
218
+ end
219
+
220
+ def advanced_search_examples
221
+ # Search with metadata filtering
222
+ puts "\nšŸŽÆ Search for programming content with beginner difficulty:"
223
+ advanced_search("programming basics", tags: ["programming"], difficulty: "beginner")
224
+
225
+ puts "\nšŸŽÆ Search for AI/ML content:"
226
+ advanced_search("artificial intelligence", tags: ["ai", "machine-learning"])
227
+ end
228
+
229
+ def advanced_search(query_text, filters = {})
230
+ begin
231
+ query_embedding = @client.generate_embedding(query_text)
232
+ query_vector = "[#{query_embedding.join(',')}]"
233
+
234
+ # Build WHERE clause for metadata filtering
235
+ where_conditions = ["de.embedding_provider = 'ollama'", "de.embedding_model = 'nomic-embed-text'"]
236
+ params = [query_vector]
237
+ param_index = 2
238
+
239
+ filters.each do |key, value|
240
+ case key
241
+ when :tags
242
+ # Filter by tags array overlap
243
+ where_conditions << "d.metadata->'tags' ?| $#{param_index}::text[]"
244
+ params << value
245
+ param_index += 1
246
+ when :difficulty
247
+ # Filter by exact difficulty match
248
+ where_conditions << "d.metadata->>'difficulty' = $#{param_index}"
249
+ params << value
250
+ param_index += 1
251
+ when :source_type
252
+ # Filter by source type
253
+ where_conditions << "d.source_type = $#{param_index}"
254
+ params << value
255
+ param_index += 1
256
+ end
257
+ end
258
+
259
+ search_query = <<~SQL
260
+ SELECT
261
+ d.title,
262
+ d.content,
263
+ d.metadata,
264
+ de.embedding <=> $1::vector AS cosine_distance,
265
+ 1 - (de.embedding <=> $1::vector) AS cosine_similarity
266
+ FROM documents d
267
+ JOIN document_embeddings de ON d.id = de.document_id
268
+ WHERE #{where_conditions.join(' AND ')}
269
+ ORDER BY de.embedding <=> $1::vector
270
+ LIMIT 3
271
+ SQL
272
+
273
+ result = @db.exec_params(search_query, params)
274
+
275
+ if result.ntuples == 0
276
+ puts " No results found with the specified filters"
277
+ return
278
+ end
279
+
280
+ result.each_with_index do |row, index|
281
+ similarity = (row['cosine_similarity'].to_f * 100).round(1)
282
+ puts " #{index + 1}. #{row['title']} (#{similarity}% similar)"
283
+
284
+ metadata = JSON.parse(row['metadata'])
285
+ puts " Difficulty: #{metadata['difficulty']}"
286
+ puts " Tags: #{metadata['tags']&.join(', ')}"
287
+ puts " #{row['content'][0..80]}..."
288
+ puts
289
+ end
290
+
291
+ rescue Prescient::Error => e
292
+ puts " āŒ Search failed: #{e.message}"
293
+ rescue PG::Error => e
294
+ puts " āŒ Database error: #{e.message}"
295
+ end
296
+ end
297
+
298
+ def compare_distance_functions
299
+ query_text = "programming languages and development"
300
+
301
+ puts "\nšŸ“ Comparing distance functions for: '#{query_text}'"
302
+
303
+ %w[cosine l2 inner_product].each do |distance_func|
304
+ puts "\n #{distance_func.upcase} Distance:"
305
+ perform_similarity_search(query_text, limit: 2, distance_function: distance_func)
306
+ end
307
+ end
308
+
309
+ def cleanup
310
+ @db.close if @db
311
+ end
312
+ end
313
+
314
+ # Run the example
315
+ begin
316
+ example = VectorSearchExample.new
317
+ example.run_example
318
+ rescue StandardError => e
319
+ puts "āŒ Example failed: #{e.message}"
320
+ puts e.backtrace.first(5).join("\n")
321
+ ensure
322
+ example&.cleanup
323
+ end
324
+
325
+ puts "\nšŸ’” Next steps:"
326
+ puts " - Try different embedding models (OpenAI, HuggingFace)"
327
+ puts " - Implement hybrid search (vector + keyword)"
328
+ puts " - Add document chunking for large texts"
329
+ puts " - Experiment with different similarity thresholds"
330
+ puts " - Add result re-ranking and filtering"
@@ -0,0 +1,374 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Base class for all AI provider implementations
4
+ #
5
+ # This abstract base class defines the common interface that all AI providers
6
+ # must implement. It provides shared functionality for text processing, context
7
+ # formatting, prompt building, and error handling.
8
+ #
9
+ # @abstract Subclass and implement {#generate_embedding}, {#generate_response},
10
+ # {#health_check}, and {#validate_configuration!}
11
+ #
12
+ # @example Creating a custom provider
13
+ # class MyProvider < Prescient::Base
14
+ # def generate_embedding(text, **options)
15
+ # # Implementation here
16
+ # end
17
+ #
18
+ # def generate_response(prompt, context_items = [], **options)
19
+ # # Implementation here
20
+ # end
21
+ #
22
+ # def health_check
23
+ # # Implementation here
24
+ # end
25
+ # end
26
+ #
27
+ # @author Claude Code
28
+ # @since 1.0.0
29
+ class Prescient::Base
30
+ # @return [Hash] Configuration options for this provider instance
31
+ attr_reader :options
32
+
33
+ # Initialize the provider with configuration options
34
+ #
35
+ # @param options [Hash] Provider-specific configuration options
36
+ # @option options [String] :api_key API key for authenticated providers
37
+ # @option options [String] :url Base URL for self-hosted providers
38
+ # @option options [Integer] :timeout Request timeout in seconds
39
+ # @option options [Hash] :prompt_templates Custom prompt templates
40
+ # @option options [Hash] :context_configs Context formatting configurations
41
+ def initialize(**options)
42
+ @options = options
43
+ validate_configuration!
44
+ end
45
+
46
+ # Generate embeddings for the given text
47
+ #
48
+ # This method must be implemented by subclasses to provide embedding
49
+ # generation functionality.
50
+ #
51
+ # @param text [String] The text to generate embeddings for
52
+ # @param options [Hash] Provider-specific options
53
+ # @return [Array<Float>] Array of embedding values
54
+ # @raise [NotImplementedError] If not implemented by subclass
55
+ # @abstract
56
+ def generate_embedding(text, **options)
57
+ raise NotImplementedError, "#{self.class} must implement #generate_embedding"
58
+ end
59
+
60
+ # Generate text response for the given prompt
61
+ #
62
+ # This method must be implemented by subclasses to provide text generation
63
+ # functionality with optional context items.
64
+ #
65
+ # @param prompt [String] The prompt to generate a response for
66
+ # @param context_items [Array<Hash, String>] Optional context items to include
67
+ # @param options [Hash] Provider-specific generation options
68
+ # @option options [Float] :temperature Sampling temperature (0.0-2.0)
69
+ # @option options [Integer] :max_tokens Maximum tokens to generate
70
+ # @option options [Float] :top_p Nucleus sampling parameter
71
+ # @return [Hash] Response hash with :response, :model, :provider keys
72
+ # @raise [NotImplementedError] If not implemented by subclass
73
+ # @abstract
74
+ def generate_response(prompt, context_items = [], **options)
75
+ raise NotImplementedError, "#{self.class} must implement #generate_response"
76
+ end
77
+
78
+ # Check the health and availability of the provider
79
+ #
80
+ # This method must be implemented by subclasses to provide health check
81
+ # functionality.
82
+ #
83
+ # @return [Hash] Health status with :status, :provider keys and optional details
84
+ # @raise [NotImplementedError] If not implemented by subclass
85
+ # @abstract
86
+ def health_check
87
+ raise NotImplementedError, "#{self.class} must implement #health_check"
88
+ end
89
+
90
+ # Check if the provider is currently available
91
+ #
92
+ # @return [Boolean] true if provider is healthy and available
93
+ def available?
94
+ health_check[:status] == 'healthy'
95
+ rescue StandardError
96
+ false
97
+ end
98
+
99
+ protected
100
+
101
+ # Validate provider configuration
102
+ #
103
+ # Override this method in subclasses to validate required configuration
104
+ # options and raise appropriate errors for missing or invalid settings.
105
+ #
106
+ # @return [void]
107
+ # @raise [Prescient::Error] If configuration is invalid
108
+ def validate_configuration!
109
+ # Override in subclasses to validate required configuration
110
+ end
111
+
112
+ # Handle and standardize errors from provider operations
113
+ #
114
+ # Wraps provider-specific operations and converts common exceptions
115
+ # into standardized Prescient error types while preserving existing
116
+ # Prescient errors.
117
+ #
118
+ # @yield The operation block to execute with error handling
119
+ # @return [Object] The result of the yielded block
120
+ # @raise [Prescient::ConnectionError] For network/timeout errors
121
+ # @raise [Prescient::InvalidResponseError] For JSON parsing errors
122
+ # @raise [Prescient::Error] For other unexpected errors
123
+ def handle_errors
124
+ yield
125
+ rescue Prescient::Error
126
+ # Re-raise Prescient errors without wrapping
127
+ raise
128
+ rescue Net::ReadTimeout, Net::OpenTimeout => e
129
+ raise Prescient::ConnectionError, "Request timeout: #{e.message}"
130
+ rescue Net::HTTPError => e
131
+ raise Prescient::ConnectionError, "HTTP error: #{e.message}"
132
+ rescue JSON::ParserError => e
133
+ raise Prescient::InvalidResponseError, "Invalid JSON response: #{e.message}"
134
+ rescue StandardError => e
135
+ raise Prescient::Error, "Unexpected error: #{e.message}"
136
+ end
137
+
138
+ # Normalize embedding dimensions to match expected size
139
+ #
140
+ # Ensures embedding vectors have consistent dimensions by truncating
141
+ # longer vectors or padding shorter ones with zeros.
142
+ #
143
+ # @param embedding [Array<Float>] The embedding vector to normalize
144
+ # @param target_dimensions [Integer] The desired number of dimensions
145
+ # @return [Array<Float>, nil] Normalized embedding or nil if input invalid
146
+ def normalize_embedding(embedding, target_dimensions)
147
+ return nil unless embedding.is_a?(Array)
148
+ return embedding.first(target_dimensions) if embedding.length >= target_dimensions
149
+
150
+ embedding + Array.new(target_dimensions - embedding.length, 0.0)
151
+ end
152
+
153
+ # Clean and preprocess text for AI processing
154
+ #
155
+ # Removes excess whitespace, normalizes spacing, and enforces length
156
+ # limits suitable for most AI models.
157
+ #
158
+ # @param text [String, nil] The text to clean
159
+ # @return [String] Cleaned text, empty string if input was nil/empty
160
+ def clean_text(text)
161
+ # Limit length for most models
162
+ text.to_s.gsub(/\s+/, ' ').strip.slice(0, 8000)
163
+ end
164
+
165
+ # Get default prompt templates
166
+ #
167
+ # Provides standard templates for system prompts and context handling
168
+ # that can be overridden via provider options.
169
+ #
170
+ # @return [Hash] Hash containing template strings with placeholders
171
+ # @private
172
+ def default_prompt_templates
173
+ {
174
+ system_prompt: 'You are a helpful AI assistant. Answer questions clearly and accurately.',
175
+ no_context_template: <<~TEMPLATE.strip,
176
+ %<system_prompt>s
177
+
178
+ Question: %<query>s
179
+
180
+ Please provide a helpful response based on your knowledge.
181
+ TEMPLATE
182
+ with_context_template: <<~TEMPLATE.strip,
183
+ %<system_prompt>s Use the following context to answer the question. If the context doesn't contain relevant information, say so clearly.
184
+
185
+ Context:
186
+ %<context>s
187
+
188
+ Question: %<query>s
189
+
190
+ Please provide a helpful response based on the context above.
191
+ TEMPLATE
192
+ }
193
+ end
194
+
195
+ # Build formatted prompt from query and context items
196
+ #
197
+ # Creates a properly formatted prompt using configurable templates,
198
+ # incorporating context items when provided.
199
+ #
200
+ # @param query [String] The user's question or prompt
201
+ # @param context_items [Array<Hash, String>] Optional context items
202
+ # @return [String] Formatted prompt ready for AI processing
203
+ def build_prompt(query, context_items = [])
204
+ templates = default_prompt_templates.merge(@options[:prompt_templates] || {})
205
+ system_prompt = templates[:system_prompt]
206
+
207
+ if context_items.empty?
208
+ templates[:no_context_template] % {
209
+ system_prompt: system_prompt,
210
+ query: query,
211
+ }
212
+ else
213
+ context_text = context_items.map.with_index(1) { |item, index|
214
+ "#{index}. #{format_context_item(item)}"
215
+ }.join("\n\n")
216
+
217
+ templates[:with_context_template] % {
218
+ system_prompt: system_prompt,
219
+ context: context_text,
220
+ query: query,
221
+ }
222
+ end
223
+ end
224
+
225
+ # Minimal default context configuration - users should define their own contexts
226
+ def default_context_configs
227
+ {
228
+ # Generic fallback configuration - works with any hash structure
229
+ 'default' => {
230
+ fields: [], # Will be dynamically determined from item keys
231
+ format: nil, # Will use fallback formatting
232
+ embedding_fields: [], # Will use all string/text fields
233
+ },
234
+ }
235
+ end
236
+
237
+ # Extract text for embedding generation based on context configuration
238
+ def extract_embedding_text(item, context_type = nil)
239
+ return item.to_s unless item.is_a?(Hash)
240
+
241
+ config = resolve_context_config(item, context_type)
242
+ text_values = extract_configured_fields(item, config) || extract_text_values(item)
243
+ text_values.join(' ').strip
244
+ end
245
+
246
+ # Extract text values from hash, excluding non-textual fields
247
+ def extract_text_values(item)
248
+ # Common fields to exclude from embedding text
249
+ # TODO: configurable fields to exclude aside from the common ones below
250
+ exclude_fields = ['id', '_id', 'uuid', 'created_at', 'updated_at', 'timestamp', 'version', 'status', 'active']
251
+
252
+ item.filter_map { |key, value|
253
+ next if exclude_fields.include?(key.to_s.downcase)
254
+ next unless value.is_a?(String) || value.is_a?(Numeric)
255
+ next if value.to_s.strip.empty?
256
+
257
+ value.to_s
258
+ }
259
+ end
260
+
261
+ # Generic context item formatting using configurable contexts
262
+ def format_context_item(item)
263
+ case item
264
+ when Hash then format_hash_item(item)
265
+ when String then item
266
+ else item.to_s
267
+ end
268
+ end
269
+
270
+ private
271
+
272
+ # Resolve context configuration for an item
273
+ def resolve_context_config(item, context_type)
274
+ context_configs = default_context_configs.merge(@options[:context_configs] || {})
275
+ return context_configs['default'] if context_configs.empty?
276
+
277
+ detected_type = context_type || detect_context_type(item)
278
+ context_configs[detected_type] || context_configs['default']
279
+ end
280
+
281
+ # Extract fields configured for embeddings
282
+ def extract_configured_fields(item, config)
283
+ return nil unless config[:embedding_fields]&.any?
284
+
285
+ config[:embedding_fields].filter_map { |field| item[field] || item[field.to_sym] }
286
+ end
287
+
288
+ # Format a hash item using context configuration
289
+ def format_hash_item(item)
290
+ config = resolve_context_config(item, nil)
291
+ return fallback_format_hash(item) unless config[:format]
292
+
293
+ format_data = build_format_data(item, config)
294
+ return fallback_format_hash(item) unless format_data.any?
295
+
296
+ apply_format_template(config[:format], format_data) || fallback_format_hash(item)
297
+ end
298
+
299
+ # Build format data from item fields
300
+ def build_format_data(item, config)
301
+ format_data = {}
302
+ fields_to_check = config[:fields].any? ? config[:fields] : item.keys.map(&:to_s)
303
+
304
+ fields_to_check.each do |field|
305
+ value = item[field] || item[field.to_sym]
306
+ format_data[field.to_sym] = value if value
307
+ end
308
+
309
+ format_data
310
+ end
311
+
312
+ # Apply format template with error handling
313
+ def apply_format_template(template, format_data)
314
+ template % format_data
315
+ rescue KeyError
316
+ nil
317
+ end
318
+
319
+ # Detect context type from item structure
320
+ def detect_context_type(item)
321
+ return 'default' unless item.is_a?(Hash)
322
+
323
+ # Check for explicit type fields (user-defined)
324
+ return item['type'].to_s if item['type']
325
+ return item['context_type'].to_s if item['context_type']
326
+ return item['model_type'].to_s.downcase if item['model_type']
327
+
328
+ # If no explicit type and user has configured contexts, try to match
329
+ context_configs = @options[:context_configs] || {}
330
+ return match_context_by_fields(item, context_configs) if context_configs.any?
331
+
332
+ # Default fallback
333
+ 'default'
334
+ end
335
+
336
+ # Match context type based on configured field patterns
337
+ def match_context_by_fields(item, context_configs)
338
+ item_fields = item.keys.map(&:to_s)
339
+ best_match = find_best_field_match(item_fields, context_configs)
340
+ best_match || 'default'
341
+ end
342
+
343
+ # Find the best matching context configuration
344
+ def find_best_field_match(item_fields, context_configs)
345
+ best_match = nil
346
+ best_score = 0
347
+
348
+ context_configs.each do |context_type, config|
349
+ next unless config[:fields]&.any?
350
+
351
+ score = calculate_field_match_score(item_fields, config[:fields])
352
+ next unless score >= 0.5 && score > best_score
353
+
354
+ best_match = context_type
355
+ best_score = score
356
+ end
357
+
358
+ best_match
359
+ end
360
+
361
+ # Calculate field matching score
362
+ def calculate_field_match_score(item_fields, config_fields)
363
+ return 0 if config_fields.empty?
364
+
365
+ matching_fields = (item_fields & config_fields).size
366
+ matching_fields.to_f / config_fields.size
367
+ end
368
+
369
+ # Fallback formatting for hash items
370
+ def fallback_format_hash(item, format_data = nil)
371
+ # Fallback: join key-value pairs
372
+ (format_data || item).map { |k, v| "#{k}: #{v}" }.join(', ')
373
+ end
374
+ end