yara-ffi 4.0.0 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,13 +2,13 @@ module Yara
2
2
  # Public: Represents a single rule match result from YARA scanning.
3
3
  #
4
4
  # A ScanResult contains information about a YARA rule that matched during
5
- # scanning, including the rule name, metadata, and string patterns. This
6
- # class provides access to rule information extracted from both the YARA-X
7
- # API and parsed rule source code.
5
+ # scanning, including the rule name, metadata, string patterns, and detailed
6
+ # pattern match information. This class provides access to rule information
7
+ # extracted from both the YARA-X API and parsed rule source code.
8
8
  #
9
- # Currently, metadata and string parsing is implemented by parsing the
10
- # original rule source code using regular expressions. This is a temporary
11
- # solution until YARA-X provides more complete API access to rule internals.
9
+ # The enhanced version provides detailed pattern match information including
10
+ # exact offsets and lengths of each pattern match, allowing for precise
11
+ # forensic analysis and data extraction.
12
12
  #
13
13
  # Examples
14
14
  #
@@ -16,7 +16,15 @@ module Yara
16
16
  # scanner.scan(data) do |result|
17
17
  # puts "Matched rule: #{result.rule_name}"
18
18
  # puts "Author: #{result.rule_meta[:author]}"
19
- # puts "Patterns: #{result.rule_strings.keys}"
19
+ #
20
+ # # New: Access detailed pattern matches
21
+ # result.pattern_matches.each do |pattern_name, matches|
22
+ # puts "Pattern #{pattern_name}: #{matches.size} matches"
23
+ # matches.each do |match|
24
+ # matched_text = data[match.offset, match.length]
25
+ # puts " At offset #{match.offset}: '#{matched_text}'"
26
+ # end
27
+ # end
20
28
  # end
21
29
  class ScanResult
22
30
  # Public: The name identifier of the matched rule.
@@ -31,30 +39,65 @@ module Yara
31
39
  # Public: Hash of string pattern names and their values from the rule.
32
40
  attr_reader :rule_strings
33
41
 
42
+ # Public: Hash of pattern names to arrays of PatternMatch objects.
43
+ #
44
+ # This provides detailed information about exactly where each pattern
45
+ # matched in the scanned data, including offset and length information.
46
+ attr_reader :pattern_matches
47
+
48
+ # Public: Array of rule tags for categorization and organization.
49
+ #
50
+ # Tags are labels attached to rules that help categorize and organize
51
+ # rule sets. Common tags include malware family names, platforms,
52
+ # or behavior categories.
53
+ attr_reader :tags
54
+
55
+ # Public: Namespace of the rule, if defined.
56
+ #
57
+ # YARA rules can be organized into namespaces to avoid naming conflicts
58
+ # and provide logical grouping. This contains the namespace name or
59
+ # nil if the rule is in the default namespace.
60
+ attr_reader :namespace
61
+
34
62
  # Public: Initialize a new ScanResult.
35
63
  #
36
64
  # This constructor is typically called internally by Scanner when a rule
37
65
  # matches during scanning. It extracts available information from both
38
- # the YARA-X API and the original rule source code.
66
+ # the YARA-X API and the original rule source code, including detailed
67
+ # pattern match information.
39
68
  #
40
- # rule_name - A String containing the rule identifier/name
41
- # rule_ptr - An FFI Pointer to the YRX_RULE structure
42
- # is_match - A Boolean indicating if this represents a match (default true)
43
- # rule_source - An optional String containing the original rule source for parsing
69
+ # rule_name - A String containing the rule identifier/name
70
+ # rule_ptr - An FFI Pointer to the YRX_RULE structure
71
+ # is_match - A Boolean indicating if this represents a match (default true)
72
+ # rule_source - An optional String containing the original rule source for parsing
73
+ # scanned_data - An optional String containing the data that was scanned (needed for pattern matches)
44
74
  #
45
75
  # Examples
46
76
  #
47
77
  # # Typically created internally by Scanner
48
- # result = ScanResult.new("MyRule", rule_ptr, true, rule_source)
49
- def initialize(rule_name, rule_ptr, is_match = true, rule_source = nil)
78
+ # result = ScanResult.new("MyRule", rule_ptr, true, rule_source, scanned_data)
79
+ def initialize(rule_name, rule_ptr, is_match = true, rule_source = nil, scanned_data = nil)
50
80
  @rule_name = rule_name
51
81
  @rule_ptr = rule_ptr
52
82
  @is_match = is_match
53
83
  @rule_source = rule_source
84
+ @scanned_data = scanned_data
54
85
  @rule_meta = {}
55
86
  @rule_strings = {}
87
+ @pattern_matches = {}
88
+ @tags = []
89
+ @namespace = nil
56
90
 
57
- # For now, parse metadata and strings from source as a temporary solution
91
+ # Extract information using YARA-X API when rule pointer is available
92
+ if @rule_ptr && !@rule_ptr.null?
93
+ # TODO: Re-enable structured metadata after fixing union handling
94
+ # extract_structured_metadata
95
+ extract_tags
96
+ extract_namespace
97
+ extract_pattern_matches
98
+ end
99
+
100
+ # Parse metadata and strings from source (primary method for now)
58
101
  if @rule_source
59
102
  parse_metadata_from_source
60
103
  parse_strings_from_source
@@ -74,6 +117,376 @@ module Yara
74
117
  @is_match
75
118
  end
76
119
 
120
+ # Public: Get all matches for a specific pattern by name.
121
+ #
122
+ # This method returns an array of PatternMatch objects for the specified
123
+ # pattern identifier, or an empty array if the pattern didn't match or
124
+ # doesn't exist.
125
+ #
126
+ # pattern_name - A String or Symbol identifying the pattern (e.g., "$text1")
127
+ #
128
+ # Examples
129
+ #
130
+ # # Get matches for a specific pattern
131
+ # matches = result.matches_for_pattern("$suspicious_string")
132
+ # matches.each { |m| puts "Found at offset #{m.offset}" }
133
+ #
134
+ # Returns an Array of PatternMatch objects.
135
+ def matches_for_pattern(pattern_name)
136
+ key = pattern_name.is_a?(Symbol) ? pattern_name : pattern_name.to_sym
137
+ @pattern_matches[key] || []
138
+ end
139
+
140
+ # Public: Get the total number of pattern matches across all patterns.
141
+ #
142
+ # This convenience method counts the total matches across all patterns
143
+ # that triggered for this rule.
144
+ #
145
+ # Examples
146
+ #
147
+ # puts "Rule matched with #{result.total_matches} pattern matches"
148
+ #
149
+ # Returns an Integer count of total matches.
150
+ def total_matches
151
+ @pattern_matches.values.map(&:size).sum
152
+ end
153
+
154
+ # Public: Get all match locations as a flattened array.
155
+ #
156
+ # This method returns all pattern matches across all patterns as a single
157
+ # array, sorted by offset. Useful for getting an overview of all match
158
+ # locations in the data.
159
+ #
160
+ # Examples
161
+ #
162
+ # # Get all matches sorted by location
163
+ # all_matches = result.all_matches.sort_by(&:offset)
164
+ # all_matches.each { |m| puts "Match at #{m.offset}" }
165
+ #
166
+ # Returns an Array of PatternMatch objects sorted by offset.
167
+ def all_matches
168
+ @pattern_matches.values.flatten.sort_by(&:offset)
169
+ end
170
+
171
+ # Public: Check if a specific pattern had any matches.
172
+ #
173
+ # This convenience method checks whether the specified pattern identifier
174
+ # had any matches during scanning.
175
+ #
176
+ # pattern_name - A String or Symbol identifying the pattern
177
+ #
178
+ # Examples
179
+ #
180
+ # if result.pattern_matched?("$malware_signature")
181
+ # puts "Malware signature detected!"
182
+ # end
183
+ #
184
+ # Returns a Boolean indicating whether the pattern matched.
185
+ def pattern_matched?(pattern_name)
186
+ matches_for_pattern(pattern_name).any?
187
+ end
188
+
189
+ # Public: Check if the rule has a specific tag.
190
+ #
191
+ # This method checks whether the rule includes the specified tag.
192
+ # Tag comparison is case-sensitive.
193
+ #
194
+ # tag - A String representing the tag to check for
195
+ #
196
+ # Examples
197
+ #
198
+ # if result.has_tag?("malware")
199
+ # puts "This rule is tagged as malware"
200
+ # end
201
+ #
202
+ # Returns a Boolean indicating whether the rule has the tag.
203
+ def has_tag?(tag)
204
+ return false if tag.nil?
205
+ @tags.include?(tag.to_s)
206
+ end
207
+
208
+ # Public: Get the qualified rule name including namespace.
209
+ #
210
+ # This method returns the fully qualified rule name, including the
211
+ # namespace if present. For rules in the default namespace, this
212
+ # is the same as rule_name.
213
+ #
214
+ # Examples
215
+ #
216
+ # result.qualified_name # => "malware.suspicious_behavior"
217
+ # # or just "rule_name" if no namespace
218
+ #
219
+ # Returns a String containing the qualified rule name.
220
+ def qualified_name
221
+ if @namespace && !@namespace.empty?
222
+ "#{@namespace}.#{@rule_name}"
223
+ else
224
+ @rule_name
225
+ end
226
+ end
227
+
228
+ # Public: Get a typed metadata value by key.
229
+ #
230
+ # This method provides type-safe access to metadata values, returning
231
+ # the actual Ruby type (String, Integer, Boolean, Float) instead of
232
+ # requiring manual type conversion.
233
+ #
234
+ # key - A String or Symbol identifying the metadata key
235
+ #
236
+ # Examples
237
+ #
238
+ # result.metadata_value(:severity) # => 8 (Integer)
239
+ # result.metadata_value("author") # => "Security Team" (String)
240
+ # result.metadata_value(:active) # => true (Boolean)
241
+ #
242
+ # Returns the metadata value in its native Ruby type, or nil if not found.
243
+ def metadata_value(key)
244
+ return nil if key.nil?
245
+ @rule_meta[key.to_sym]
246
+ end
247
+
248
+ # Public: Get an integer metadata value by key.
249
+ #
250
+ # This method provides a convenient way to access integer metadata
251
+ # with automatic type checking.
252
+ #
253
+ # key - A String or Symbol identifying the metadata key
254
+ #
255
+ # Examples
256
+ #
257
+ # result.metadata_int(:severity) # => 8
258
+ # result.metadata_int(:version) # => 2
259
+ #
260
+ # Returns an Integer value, or nil if key doesn't exist or isn't an integer.
261
+ def metadata_int(key)
262
+ value = metadata_value(key)
263
+ value.is_a?(Integer) ? value : nil
264
+ end
265
+
266
+ # Public: Get a string metadata value by key.
267
+ #
268
+ # This method provides a convenient way to access string metadata
269
+ # with automatic type checking.
270
+ #
271
+ # key - A String or Symbol identifying the metadata key
272
+ #
273
+ # Examples
274
+ #
275
+ # result.metadata_string(:author) # => "Security Team"
276
+ # result.metadata_string(:description) # => "Detects malware"
277
+ #
278
+ # Returns a String value, or nil if key doesn't exist or isn't a string.
279
+ def metadata_string(key)
280
+ value = metadata_value(key)
281
+ value.is_a?(String) ? value : nil
282
+ end
283
+
284
+ # Public: Get a boolean metadata value by key.
285
+ #
286
+ # This method provides a convenient way to access boolean metadata
287
+ # with automatic type checking.
288
+ #
289
+ # key - A String or Symbol identifying the metadata key
290
+ #
291
+ # Examples
292
+ #
293
+ # result.metadata_bool(:active) # => true
294
+ # result.metadata_bool(:enabled) # => false
295
+ #
296
+ # Returns a Boolean value, or nil if key doesn't exist or isn't a boolean.
297
+ def metadata_bool(key)
298
+ value = metadata_value(key)
299
+ [true, false].include?(value) ? value : nil
300
+ end
301
+
302
+ # Public: Get a float metadata value by key.
303
+ #
304
+ # This method provides a convenient way to access float metadata
305
+ # with automatic type checking.
306
+ #
307
+ # key - A String or Symbol identifying the metadata key
308
+ #
309
+ # Examples
310
+ #
311
+ # result.metadata_float(:confidence) # => 0.95
312
+ # result.metadata_float(:ratio) # => 3.14
313
+ #
314
+ # Returns a Float value, or nil if key doesn't exist or isn't a float.
315
+ def metadata_float(key)
316
+ value = metadata_value(key)
317
+ value.is_a?(Float) ? value : nil
318
+ end
319
+
320
+ # Internal: Extract detailed pattern match information using YARA-X API.
321
+ #
322
+ # This method uses the YARA-X C API to iterate through all patterns defined
323
+ # in the matched rule and collect detailed match information including exact
324
+ # offsets and lengths for each match.
325
+ #
326
+ # This replaces the need to parse pattern information from rule source code
327
+ # and provides precise forensic data about what matched and where.
328
+ #
329
+ # Returns nothing (modifies @pattern_matches hash).
330
+ def extract_pattern_matches
331
+ return unless @rule_ptr && !@rule_ptr.null?
332
+
333
+ # Collect pattern match data by iterating through patterns
334
+ pattern_callback = proc do |pattern_ptr, user_data|
335
+ next if pattern_ptr.nil? || pattern_ptr.null?
336
+
337
+ # Get pattern identifier
338
+ ident_ptr = ::FFI::MemoryPointer.new(:pointer)
339
+ len_ptr = ::FFI::MemoryPointer.new(:size_t)
340
+
341
+ result = Yara::FFI.yrx_pattern_identifier(pattern_ptr, ident_ptr, len_ptr)
342
+ next unless result == Yara::FFI::YRX_SUCCESS
343
+
344
+ identifier_ptr = ident_ptr.get_pointer(0)
345
+ next if identifier_ptr.nil? || identifier_ptr.null?
346
+
347
+ identifier_len = len_ptr.get_ulong(0)
348
+ pattern_name = identifier_ptr.read_string(identifier_len).to_sym
349
+
350
+ # Initialize match array for this pattern
351
+ @pattern_matches[pattern_name] ||= []
352
+
353
+ # Iterate through matches for this pattern
354
+ match_callback = proc do |match_ptr, match_user_data|
355
+ next if match_ptr.nil? || match_ptr.null?
356
+
357
+ # Extract match details using FFI struct
358
+ match = Yara::FFI::YRX_MATCH.new(match_ptr)
359
+ pattern_match = PatternMatch.new(match[:offset], match[:length])
360
+ @pattern_matches[pattern_name] << pattern_match
361
+ end
362
+
363
+ # Iterate through all matches for this pattern
364
+ Yara::FFI.yrx_pattern_iter_matches(pattern_ptr, match_callback, nil)
365
+ end
366
+
367
+ # Iterate through all patterns in the rule
368
+ Yara::FFI.yrx_rule_iter_patterns(@rule_ptr, pattern_callback, nil)
369
+ end
370
+
371
+ # Internal: Extract structured metadata using YARA-X API.
372
+ #
373
+ # This method uses the YARA-X C API to access rule metadata with proper
374
+ # type information, replacing the regex-based parsing approach with
375
+ # reliable structured access.
376
+ #
377
+ # Returns nothing (modifies @rule_meta hash).
378
+ def extract_structured_metadata
379
+ return unless @rule_ptr && !@rule_ptr.null?
380
+
381
+ # Callback to process each metadata entry
382
+ metadata_callback = proc do |metadata_ptr, user_data|
383
+ next if metadata_ptr.nil? || metadata_ptr.null?
384
+
385
+ begin
386
+ # Extract metadata using FFI struct
387
+ metadata = Yara::FFI::YRX_METADATA.new(metadata_ptr)
388
+ identifier_ptr = metadata[:identifier]
389
+ next if identifier_ptr.nil? || identifier_ptr.null?
390
+
391
+ identifier = identifier_ptr.read_string.to_sym
392
+ value_type = metadata[:value_type]
393
+
394
+ # Extract value based on type using union access
395
+ # Note: We need to read from the value union at the correct offset
396
+ value_ptr = metadata.pointer + metadata.offset_of(:value)
397
+
398
+ case value_type
399
+ when Yara::FFI::YRX_I64
400
+ value = value_ptr.read_long_long
401
+ when Yara::FFI::YRX_F64
402
+ value = value_ptr.read_double
403
+ when Yara::FFI::YRX_BOOLEAN
404
+ value = value_ptr.read_char != 0
405
+ when Yara::FFI::YRX_STRING
406
+ string_ptr_ptr = value_ptr.read_pointer
407
+ value = string_ptr_ptr.nil? || string_ptr_ptr.null? ? "" : string_ptr_ptr.read_string
408
+ when Yara::FFI::YRX_BYTES
409
+ bytes_ptr = value_ptr.read_pointer
410
+ if bytes_ptr.nil? || bytes_ptr.null?
411
+ value = ""
412
+ else
413
+ # Read the YRX_METADATA_BYTES struct
414
+ length = bytes_ptr.read_size_t
415
+ data_ptr = bytes_ptr.read_pointer(8) # offset past the length field
416
+ value = length > 0 && !data_ptr.null? ? data_ptr.read_string(length) : ""
417
+ end
418
+ else
419
+ value = nil # Unknown type
420
+ end
421
+
422
+ @rule_meta[identifier] = value unless value.nil?
423
+ rescue
424
+ # Skip problematic metadata entries rather than failing entirely
425
+ # This ensures partial extraction works even if some entries have issues
426
+ end
427
+ end
428
+
429
+ # Iterate through all metadata entries
430
+ Yara::FFI.yrx_rule_iter_metadata(@rule_ptr, metadata_callback, nil)
431
+ rescue
432
+ # If structured metadata extraction fails, fall back to source parsing
433
+ # This ensures backwards compatibility
434
+ end
435
+
436
+ # Internal: Extract rule tags using YARA-X API.
437
+ #
438
+ # This method uses the YARA-X C API to access all tags defined for the rule.
439
+ # Tags provide categorization and organization capabilities for rule sets.
440
+ #
441
+ # Returns nothing (modifies @tags array).
442
+ def extract_tags
443
+ return unless @rule_ptr && !@rule_ptr.null?
444
+
445
+ # Callback to process each tag
446
+ tag_callback = proc do |tag_ptr, user_data|
447
+ next if tag_ptr.nil? || tag_ptr.null?
448
+
449
+ begin
450
+ tag = tag_ptr.read_string
451
+ @tags << tag unless tag.empty?
452
+ rescue
453
+ # Skip problematic tags rather than failing entirely
454
+ end
455
+ end
456
+
457
+ # Iterate through all tags
458
+ Yara::FFI.yrx_rule_iter_tags(@rule_ptr, tag_callback, nil)
459
+
460
+ # If iteration fails, ensure @tags is at least an empty array
461
+ @tags ||= []
462
+ end
463
+
464
+ # Internal: Extract rule namespace using YARA-X API.
465
+ #
466
+ # This method uses the YARA-X C API to access the namespace that contains
467
+ # this rule. Namespaces provide logical grouping and avoid naming conflicts.
468
+ #
469
+ # Returns nothing (modifies @namespace attribute).
470
+ def extract_namespace
471
+ return unless @rule_ptr && !@rule_ptr.null?
472
+
473
+ # Get namespace information
474
+ ns_ptr = ::FFI::MemoryPointer.new(:pointer)
475
+ len_ptr = ::FFI::MemoryPointer.new(:size_t)
476
+
477
+ result = Yara::FFI.yrx_rule_namespace(@rule_ptr, ns_ptr, len_ptr)
478
+ return unless result == Yara::FFI::YRX_SUCCESS
479
+
480
+ namespace_ptr = ns_ptr.get_pointer(0)
481
+ return if namespace_ptr.nil? || namespace_ptr.null?
482
+
483
+ namespace_len = len_ptr.get_ulong(0)
484
+ @namespace = namespace_len > 0 ? namespace_ptr.read_string(namespace_len) : nil
485
+ rescue
486
+ # Set to nil if extraction fails
487
+ @namespace = nil
488
+ end
489
+
77
490
  # Internal: Parse metadata from the original rule source code.
78
491
  #
79
492
  # This method uses regular expressions to extract key-value pairs from