yara-ffi 3.1.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,101 +1,190 @@
1
1
  module Yara
2
+ # Public: Represents a single rule match result from YARA scanning.
3
+ #
4
+ # A ScanResult contains information about a YARA rule that matched during
5
+ # scanning, including the rule name, metadata, and string patterns. This
6
+ # class provides access to rule information extracted from both the YARA-X
7
+ # API and parsed rule source code.
8
+ #
9
+ # Currently, metadata and string parsing is implemented by parsing the
10
+ # original rule source code using regular expressions. This is a temporary
11
+ # solution until YARA-X provides more complete API access to rule internals.
12
+ #
13
+ # Examples
14
+ #
15
+ # # Typically created by Scanner during scanning
16
+ # scanner.scan(data) do |result|
17
+ # puts "Matched rule: #{result.rule_name}"
18
+ # puts "Author: #{result.rule_meta[:author]}"
19
+ # puts "Patterns: #{result.rule_strings.keys}"
20
+ # end
2
21
  class ScanResult
3
- RULE_MATCHING = 1
4
- RULE_NOT_MATCHING = 2
5
-
6
- META_FLAGS_LAST_IN_RULE = 1
7
-
8
- META_TYPE_INTEGER = 1
9
- # META_TYPE_STRING = 2
10
- META_TYPE_BOOLEAN = 3
11
-
12
- STRING_FLAGS_LAST_IN_RULE = 0
13
-
14
- attr_reader :callback_type, :rule
15
-
16
- def initialize(callback_type, rule, user_data)
17
- @callback_type = callback_type
18
- @rule = rule
19
- @rule_meta = extract_rule_meta
20
- @rule_strings = extract_rule_strings
21
- @user_data_number = user_data[:number]
22
- end
23
-
24
- attr_reader :rule_meta, :rule_strings, :user_data_number
25
-
26
- def rule_name
27
- @rule[:identifier]
28
- end
29
-
30
- def scan_complete?
31
- callback_type == SCAN_FINISHED
32
- end
33
-
34
- def rule_outcome?
35
- [RULE_MATCHING, RULE_NOT_MATCHING].include?(callback_type)
22
+ # Public: The name identifier of the matched rule.
23
+ attr_reader :rule_name
24
+
25
+ # Public: FFI pointer to the underlying YRX_RULE structure.
26
+ attr_reader :rule_ptr
27
+
28
+ # Public: Hash of metadata key-value pairs extracted from the rule.
29
+ attr_reader :rule_meta
30
+
31
+ # Public: Hash of string pattern names and their values from the rule.
32
+ attr_reader :rule_strings
33
+
34
+ # Public: Initialize a new ScanResult.
35
+ #
36
+ # This constructor is typically called internally by Scanner when a rule
37
+ # matches during scanning. It extracts available information from both
38
+ # the YARA-X API and the original rule source code.
39
+ #
40
+ # rule_name - A String containing the rule identifier/name
41
+ # rule_ptr - An FFI Pointer to the YRX_RULE structure
42
+ # is_match - A Boolean indicating if this represents a match (default true)
43
+ # rule_source - An optional String containing the original rule source for parsing
44
+ #
45
+ # Examples
46
+ #
47
+ # # Typically created internally by Scanner
48
+ # result = ScanResult.new("MyRule", rule_ptr, true, rule_source)
49
+ def initialize(rule_name, rule_ptr, is_match = true, rule_source = nil)
50
+ @rule_name = rule_name
51
+ @rule_ptr = rule_ptr
52
+ @is_match = is_match
53
+ @rule_source = rule_source
54
+ @rule_meta = {}
55
+ @rule_strings = {}
56
+
57
+ # For now, parse metadata and strings from source as a temporary solution
58
+ if @rule_source
59
+ parse_metadata_from_source
60
+ parse_strings_from_source
61
+ end
36
62
  end
37
63
 
64
+ # Public: Check if this result represents a rule match.
65
+ #
66
+ # Examples
67
+ #
68
+ # if result.match?
69
+ # puts "Rule #{result.rule_name} matched!"
70
+ # end
71
+ #
72
+ # Returns a Boolean indicating whether the rule matched.
38
73
  def match?
39
- callback_type == RULE_MATCHING
74
+ @is_match
40
75
  end
41
76
 
42
- private
43
-
44
- def extract_rule_meta
45
- metas = {}
46
- reading_metas = true
47
- meta_index = 0
48
- meta_pointer = @rule[:metas]
49
- while reading_metas do
50
- meta = YrMeta.new(meta_pointer + meta_index * YrMeta.size)
51
- metas.merge!(meta_as_hash(meta))
52
- flags = meta[:flags]
53
- if flags == META_FLAGS_LAST_IN_RULE
54
- reading_metas = false
55
- else
56
- meta_index += 1
77
+ # Internal: Parse metadata from the original rule source code.
78
+ #
79
+ # This method uses regular expressions to extract key-value pairs from
80
+ # the rule's meta section. It handles string, boolean, and numeric values
81
+ # with basic type conversion. This is a temporary implementation until
82
+ # YARA-X provides direct API access to rule metadata.
83
+ #
84
+ # Examples
85
+ #
86
+ # # Given rule source with:
87
+ # # meta:
88
+ # # author = "security_team"
89
+ # # version = 1
90
+ # # active = true
91
+ #
92
+ # result.rule_meta[:author] # => "security_team"
93
+ # result.rule_meta[:version] # => 1
94
+ # result.rule_meta[:active] # => true
95
+ #
96
+ # Returns nothing (modifies @rule_meta hash).
97
+ def parse_metadata_from_source
98
+ return unless @rule_source
99
+
100
+ # Extract metadata section more carefully
101
+ if @rule_source =~ /meta:\s*(.*?)(?:strings:|condition:)/m
102
+ meta_section = $1.strip
103
+
104
+ # Parse each line in the meta section
105
+ meta_section.split("\n").each do |line|
106
+ line = line.strip
107
+ next if line.empty?
108
+
109
+ if line =~ /^(\w+)\s*=\s*(.+)$/
110
+ key, value = $1, $2
111
+ parsed_value = parse_meta_value(value.strip)
112
+ @rule_meta[key.to_sym] = parsed_value
113
+ end
57
114
  end
58
115
  end
59
- metas
60
116
  end
61
117
 
62
- def extract_rule_strings
63
- strings = {}
64
- reading_strings = true
65
- string_index = 0
66
- string_pointer = @rule[:strings]
67
- while reading_strings do
68
- string = YrString.new(string_pointer + string_index * YrString.size)
69
- string_length = string[:length]
70
- flags = string[:flags]
71
- if flags == STRING_FLAGS_LAST_IN_RULE
72
- reading_strings = false
73
- else
74
- strings.merge!(string_as_hash(string)) unless string_length == 0
75
- string_index += 1
118
+ # Internal: Parse string patterns from the original rule source code.
119
+ #
120
+ # This method uses regular expressions to extract pattern definitions from
121
+ # the rule's strings section. It captures both the pattern variable names
122
+ # (like $string1) and their values, cleaning up quotes and regex delimiters.
123
+ # This is a temporary implementation until YARA-X provides direct API access.
124
+ #
125
+ # Examples
126
+ #
127
+ # # Given rule source with:
128
+ # # strings:
129
+ # # $text = "hello world"
130
+ # # $regex = /pattern[0-9]+/
131
+ # # $hex = { 41 42 43 }
132
+ #
133
+ # result.rule_strings[:$text] # => "hello world"
134
+ # result.rule_strings[:$regex] # => "pattern[0-9]+"
135
+ # result.rule_strings[:$hex] # => "{ 41 42 43 }"
136
+ #
137
+ # Returns nothing (modifies @rule_strings hash).
138
+ def parse_strings_from_source
139
+ return unless @rule_source
140
+
141
+ # Extract strings section more carefully
142
+ if @rule_source =~ /strings:\s*(.*?)(?:condition:)/m
143
+ strings_section = $1.strip
144
+
145
+ # Parse each line in the strings section
146
+ strings_section.split("\n").each do |line|
147
+ line = line.strip
148
+ next if line.empty?
149
+
150
+ if line =~ /^(\$\w+)\s*=\s*(.+)$/
151
+ name, pattern = $1, $2
152
+ # Clean up the pattern (remove quotes, regex delimiters)
153
+ cleaned_pattern = pattern.strip.gsub(/^["\/]|["\/]$/, '')
154
+ @rule_strings[name.to_sym] = cleaned_pattern
155
+ end
76
156
  end
77
157
  end
78
- strings
79
- end
80
-
81
- def meta_as_hash(meta)
82
- value = meta_value(meta[:string], meta[:integer], meta[:type])
83
- { meta[:identifier].to_sym => value }
84
- end
85
-
86
- def string_as_hash(yr_string)
87
- string_pointer = yr_string[:string]
88
- string_identifier = yr_string[:identifier]
89
- { string_identifier.to_sym => string_pointer.read_string }
90
158
  end
91
159
 
92
- def meta_value(string_value, int_value, type)
93
- if type == META_TYPE_INTEGER
94
- int_value
95
- elsif type == META_TYPE_BOOLEAN
96
- int_value == 1
160
+ # Internal: Parse and convert metadata values to appropriate Ruby types.
161
+ #
162
+ # This method handles basic type conversion for metadata values extracted
163
+ # from rule source code. It recognizes quoted strings, boolean literals,
164
+ # and numeric values, converting them to appropriate Ruby types.
165
+ #
166
+ # value - A String containing the raw metadata value from rule source
167
+ #
168
+ # Examples
169
+ #
170
+ # parse_meta_value('"hello"') # => "hello"
171
+ # parse_meta_value('true') # => true
172
+ # parse_meta_value('42') # => 42
173
+ # parse_meta_value('other') # => "other"
174
+ #
175
+ # Returns the parsed value in the appropriate Ruby type.
176
+ def parse_meta_value(value)
177
+ case value
178
+ when /^".*"$/
179
+ value[1...-1] # Remove quotes
180
+ when /^true$/i
181
+ true
182
+ when /^false$/i
183
+ false
184
+ when /^\d+$/
185
+ value.to_i
97
186
  else
98
- string_value
187
+ value
99
188
  end
100
189
  end
101
190
  end
@@ -0,0 +1,224 @@
1
+ module Yara
2
+ # Public: Collection of ScanResult objects from YARA scanning operations.
3
+ #
4
+ # ScanResults acts as an enumerable container for individual rule matches,
5
+ # providing convenient methods for accessing and querying scan results. It
6
+ # supports standard collection operations and offers specialized methods for
7
+ # common YARA use cases like checking for any matches or extracting rule names.
8
+ #
9
+ # This class is typically returned by Scanner#scan when no block is provided,
10
+ # containing all rules that matched during the scanning operation.
11
+ #
12
+ # Examples
13
+ #
14
+ # results = scanner.scan(data)
15
+ #
16
+ # if results.matched?
17
+ # puts "Found #{results.size} matches"
18
+ # results.each { |match| puts match.rule_name }
19
+ # end
20
+ #
21
+ # rule_names = results.matching_rules
22
+ # first_match = results.first
23
+ class ScanResults
24
+ include Enumerable
25
+
26
+ # Public: Initialize a new ScanResults collection.
27
+ #
28
+ # Creates an empty results collection that can be populated with ScanResult
29
+ # objects. This is typically called internally by Scanner during scanning
30
+ # operations.
31
+ #
32
+ # results - An optional Array of ScanResult objects (default: empty array)
33
+ #
34
+ # Examples
35
+ #
36
+ # # Typically created internally by Scanner
37
+ # results = ScanResults.new
38
+ # results << scan_result
39
+ def initialize(results = [])
40
+ @results = results
41
+ end
42
+
43
+ # Public: Enumerate over all scan results.
44
+ #
45
+ # Implements the Enumerable interface, allowing standard collection methods
46
+ # like map, select, reject, etc. to be used on the results collection.
47
+ #
48
+ # block - Block that receives each ScanResult object
49
+ #
50
+ # Examples
51
+ #
52
+ # results.each { |result| puts result.rule_name }
53
+ # matched_names = results.map(&:rule_name)
54
+ # malware_results = results.select { |r| r.rule_meta[:category] == 'malware' }
55
+ #
56
+ # Returns an Enumerator when no block given, otherwise returns self.
57
+ def each(&block)
58
+ @results.each(&block)
59
+ end
60
+
61
+ # Public: Add a ScanResult to this collection.
62
+ #
63
+ # This method is used internally during scanning to accumulate matching
64
+ # rules. It appends the result to the internal results array.
65
+ #
66
+ # result - A ScanResult object to add to the collection
67
+ #
68
+ # Examples
69
+ #
70
+ # results = ScanResults.new
71
+ # results << ScanResult.new("MyRule", rule_ptr)
72
+ #
73
+ # Returns self for method chaining.
74
+ def <<(result)
75
+ @results << result
76
+ end
77
+
78
+ # Public: Get all scan results as an array.
79
+ #
80
+ # Returns the internal array of ScanResult objects. This method is provided
81
+ # for compatibility and direct access to the underlying collection.
82
+ #
83
+ # Examples
84
+ #
85
+ # all_results = results.matches
86
+ # puts "Found #{all_results.length} matches"
87
+ #
88
+ # Returns an Array of ScanResult objects.
89
+ def matches
90
+ @results
91
+ end
92
+
93
+ # Public: Extract the names of all matching rules.
94
+ #
95
+ # This convenience method returns just the rule names from all results,
96
+ # which is commonly needed for logging, reporting, or further processing
97
+ # of scan results.
98
+ #
99
+ # Examples
100
+ #
101
+ # rule_names = results.matching_rules
102
+ # puts "Matched: #{rule_names.join(', ')}"
103
+ #
104
+ # Returns an Array of String rule names.
105
+ def matching_rules
106
+ @results.map(&:rule_name)
107
+ end
108
+
109
+ # Public: Check if any rules matched during scanning.
110
+ #
111
+ # This is a convenience method to test whether the scan found any matches
112
+ # without needing to check the size or examine individual results.
113
+ #
114
+ # Examples
115
+ #
116
+ # if results.matched?
117
+ # puts "Scan found matches!"
118
+ # else
119
+ # puts "No matches found"
120
+ # end
121
+ #
122
+ # Returns true if there are any results, false otherwise.
123
+ def matched?
124
+ !@results.empty?
125
+ end
126
+
127
+ # Public: Alias for matched? method.
128
+ #
129
+ # Provides an alternative method name that may be more natural in some
130
+ # contexts, particularly when used in conditional expressions.
131
+ #
132
+ # Examples
133
+ #
134
+ # puts "Clean file" unless results.match?
135
+ #
136
+ # Returns true if there are any results, false otherwise.
137
+ alias_method :match?, :matched?
138
+
139
+ # Public: Get the number of matching rules.
140
+ #
141
+ # Returns the count of ScanResult objects in this collection, indicating
142
+ # how many rules matched during the scan operation.
143
+ #
144
+ # Examples
145
+ #
146
+ # puts "#{results.size} rules matched"
147
+ # alert_count = results.size
148
+ #
149
+ # Returns an Integer count of results.
150
+ def size
151
+ @results.size
152
+ end
153
+
154
+ # Public: Aliases for size method.
155
+ #
156
+ # These provide alternative method names for getting the collection size,
157
+ # maintaining compatibility with standard Ruby collection interfaces.
158
+ alias_method :length, :size
159
+ alias_method :count, :size
160
+
161
+ # Public: Get the first scan result.
162
+ #
163
+ # Returns the first ScanResult object in the collection, or nil if the
164
+ # collection is empty. Useful when you expect only one match or want to
165
+ # examine the first match found.
166
+ #
167
+ # Examples
168
+ #
169
+ # first_match = results.first
170
+ # puts first_match.rule_name if first_match
171
+ #
172
+ # Returns a ScanResult object or nil if collection is empty.
173
+ def first
174
+ @results.first
175
+ end
176
+
177
+ # Public: Get the last scan result.
178
+ #
179
+ # Returns the last ScanResult object in the collection, or nil if the
180
+ # collection is empty. The order depends on the sequence in which rules
181
+ # matched during scanning.
182
+ #
183
+ # Examples
184
+ #
185
+ # last_match = results.last
186
+ # puts "Final match: #{last_match.rule_name}" if last_match
187
+ #
188
+ # Returns a ScanResult object or nil if collection is empty.
189
+ def last
190
+ @results.last
191
+ end
192
+
193
+ # Public: Check if the results collection is empty.
194
+ #
195
+ # Returns true if no rules matched during scanning, false otherwise.
196
+ # This is the inverse of matched? and can be useful for control flow.
197
+ #
198
+ # Examples
199
+ #
200
+ # puts "No threats detected" if results.empty?
201
+ # process_results unless results.empty?
202
+ #
203
+ # Returns true if no results exist, false otherwise.
204
+ def empty?
205
+ @results.empty?
206
+ end
207
+
208
+ # Public: Convert results to a plain array.
209
+ #
210
+ # Returns a duplicate of the internal results array, allowing manipulation
211
+ # without affecting the original ScanResults object. This is useful when
212
+ # you need to work with the results as a standard Ruby array.
213
+ #
214
+ # Examples
215
+ #
216
+ # array_copy = results.to_a
217
+ # sorted_results = results.to_a.sort_by(&:rule_name)
218
+ #
219
+ # Returns a new Array containing all ScanResult objects.
220
+ def to_a
221
+ @results.dup
222
+ end
223
+ end
224
+ end