yara-ffi 3.1.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/copilot-instructions.md +148 -0
- data/.github/workflows/ruby.yml +69 -17
- data/CHANGELOG.md +59 -1
- data/DEVELOPMENT.md +188 -0
- data/Dockerfile +19 -11
- data/Gemfile.lock +38 -23
- data/README.md +129 -14
- data/lib/yara/ffi.rb +300 -111
- data/lib/yara/scan_result.rb +171 -82
- data/lib/yara/scan_results.rb +224 -0
- data/lib/yara/scanner.rb +236 -48
- data/lib/yara/version.rb +5 -1
- data/lib/yara.rb +70 -15
- data/yara-ffi.gemspec +3 -3
- metadata +9 -14
- data/lib/yara/user_data.rb +0 -5
- data/lib/yara/yr_meta.rb +0 -10
- data/lib/yara/yr_namespace.rb +0 -5
- data/lib/yara/yr_rule.rb +0 -11
- data/lib/yara/yr_string.rb +0 -15
data/lib/yara/scan_result.rb
CHANGED
@@ -1,101 +1,190 @@
|
|
1
1
|
module Yara
|
2
|
+
# Public: Represents a single rule match result from YARA scanning.
|
3
|
+
#
|
4
|
+
# A ScanResult contains information about a YARA rule that matched during
|
5
|
+
# scanning, including the rule name, metadata, and string patterns. This
|
6
|
+
# class provides access to rule information extracted from both the YARA-X
|
7
|
+
# API and parsed rule source code.
|
8
|
+
#
|
9
|
+
# Currently, metadata and string parsing is implemented by parsing the
|
10
|
+
# original rule source code using regular expressions. This is a temporary
|
11
|
+
# solution until YARA-X provides more complete API access to rule internals.
|
12
|
+
#
|
13
|
+
# Examples
|
14
|
+
#
|
15
|
+
# # Typically created by Scanner during scanning
|
16
|
+
# scanner.scan(data) do |result|
|
17
|
+
# puts "Matched rule: #{result.rule_name}"
|
18
|
+
# puts "Author: #{result.rule_meta[:author]}"
|
19
|
+
# puts "Patterns: #{result.rule_strings.keys}"
|
20
|
+
# end
|
2
21
|
class ScanResult
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
#
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
def
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
22
|
+
# Public: The name identifier of the matched rule.
|
23
|
+
attr_reader :rule_name
|
24
|
+
|
25
|
+
# Public: FFI pointer to the underlying YRX_RULE structure.
|
26
|
+
attr_reader :rule_ptr
|
27
|
+
|
28
|
+
# Public: Hash of metadata key-value pairs extracted from the rule.
|
29
|
+
attr_reader :rule_meta
|
30
|
+
|
31
|
+
# Public: Hash of string pattern names and their values from the rule.
|
32
|
+
attr_reader :rule_strings
|
33
|
+
|
34
|
+
# Public: Initialize a new ScanResult.
|
35
|
+
#
|
36
|
+
# This constructor is typically called internally by Scanner when a rule
|
37
|
+
# matches during scanning. It extracts available information from both
|
38
|
+
# the YARA-X API and the original rule source code.
|
39
|
+
#
|
40
|
+
# rule_name - A String containing the rule identifier/name
|
41
|
+
# rule_ptr - An FFI Pointer to the YRX_RULE structure
|
42
|
+
# is_match - A Boolean indicating if this represents a match (default true)
|
43
|
+
# rule_source - An optional String containing the original rule source for parsing
|
44
|
+
#
|
45
|
+
# Examples
|
46
|
+
#
|
47
|
+
# # Typically created internally by Scanner
|
48
|
+
# result = ScanResult.new("MyRule", rule_ptr, true, rule_source)
|
49
|
+
def initialize(rule_name, rule_ptr, is_match = true, rule_source = nil)
|
50
|
+
@rule_name = rule_name
|
51
|
+
@rule_ptr = rule_ptr
|
52
|
+
@is_match = is_match
|
53
|
+
@rule_source = rule_source
|
54
|
+
@rule_meta = {}
|
55
|
+
@rule_strings = {}
|
56
|
+
|
57
|
+
# For now, parse metadata and strings from source as a temporary solution
|
58
|
+
if @rule_source
|
59
|
+
parse_metadata_from_source
|
60
|
+
parse_strings_from_source
|
61
|
+
end
|
36
62
|
end
|
37
63
|
|
64
|
+
# Public: Check if this result represents a rule match.
|
65
|
+
#
|
66
|
+
# Examples
|
67
|
+
#
|
68
|
+
# if result.match?
|
69
|
+
# puts "Rule #{result.rule_name} matched!"
|
70
|
+
# end
|
71
|
+
#
|
72
|
+
# Returns a Boolean indicating whether the rule matched.
|
38
73
|
def match?
|
39
|
-
|
74
|
+
@is_match
|
40
75
|
end
|
41
76
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
77
|
+
# Internal: Parse metadata from the original rule source code.
|
78
|
+
#
|
79
|
+
# This method uses regular expressions to extract key-value pairs from
|
80
|
+
# the rule's meta section. It handles string, boolean, and numeric values
|
81
|
+
# with basic type conversion. This is a temporary implementation until
|
82
|
+
# YARA-X provides direct API access to rule metadata.
|
83
|
+
#
|
84
|
+
# Examples
|
85
|
+
#
|
86
|
+
# # Given rule source with:
|
87
|
+
# # meta:
|
88
|
+
# # author = "security_team"
|
89
|
+
# # version = 1
|
90
|
+
# # active = true
|
91
|
+
#
|
92
|
+
# result.rule_meta[:author] # => "security_team"
|
93
|
+
# result.rule_meta[:version] # => 1
|
94
|
+
# result.rule_meta[:active] # => true
|
95
|
+
#
|
96
|
+
# Returns nothing (modifies @rule_meta hash).
|
97
|
+
def parse_metadata_from_source
|
98
|
+
return unless @rule_source
|
99
|
+
|
100
|
+
# Extract metadata section more carefully
|
101
|
+
if @rule_source =~ /meta:\s*(.*?)(?:strings:|condition:)/m
|
102
|
+
meta_section = $1.strip
|
103
|
+
|
104
|
+
# Parse each line in the meta section
|
105
|
+
meta_section.split("\n").each do |line|
|
106
|
+
line = line.strip
|
107
|
+
next if line.empty?
|
108
|
+
|
109
|
+
if line =~ /^(\w+)\s*=\s*(.+)$/
|
110
|
+
key, value = $1, $2
|
111
|
+
parsed_value = parse_meta_value(value.strip)
|
112
|
+
@rule_meta[key.to_sym] = parsed_value
|
113
|
+
end
|
57
114
|
end
|
58
115
|
end
|
59
|
-
metas
|
60
116
|
end
|
61
117
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
118
|
+
# Internal: Parse string patterns from the original rule source code.
|
119
|
+
#
|
120
|
+
# This method uses regular expressions to extract pattern definitions from
|
121
|
+
# the rule's strings section. It captures both the pattern variable names
|
122
|
+
# (like $string1) and their values, cleaning up quotes and regex delimiters.
|
123
|
+
# This is a temporary implementation until YARA-X provides direct API access.
|
124
|
+
#
|
125
|
+
# Examples
|
126
|
+
#
|
127
|
+
# # Given rule source with:
|
128
|
+
# # strings:
|
129
|
+
# # $text = "hello world"
|
130
|
+
# # $regex = /pattern[0-9]+/
|
131
|
+
# # $hex = { 41 42 43 }
|
132
|
+
#
|
133
|
+
# result.rule_strings[:$text] # => "hello world"
|
134
|
+
# result.rule_strings[:$regex] # => "pattern[0-9]+"
|
135
|
+
# result.rule_strings[:$hex] # => "{ 41 42 43 }"
|
136
|
+
#
|
137
|
+
# Returns nothing (modifies @rule_strings hash).
|
138
|
+
def parse_strings_from_source
|
139
|
+
return unless @rule_source
|
140
|
+
|
141
|
+
# Extract strings section more carefully
|
142
|
+
if @rule_source =~ /strings:\s*(.*?)(?:condition:)/m
|
143
|
+
strings_section = $1.strip
|
144
|
+
|
145
|
+
# Parse each line in the strings section
|
146
|
+
strings_section.split("\n").each do |line|
|
147
|
+
line = line.strip
|
148
|
+
next if line.empty?
|
149
|
+
|
150
|
+
if line =~ /^(\$\w+)\s*=\s*(.+)$/
|
151
|
+
name, pattern = $1, $2
|
152
|
+
# Clean up the pattern (remove quotes, regex delimiters)
|
153
|
+
cleaned_pattern = pattern.strip.gsub(/^["\/]|["\/]$/, '')
|
154
|
+
@rule_strings[name.to_sym] = cleaned_pattern
|
155
|
+
end
|
76
156
|
end
|
77
157
|
end
|
78
|
-
strings
|
79
|
-
end
|
80
|
-
|
81
|
-
def meta_as_hash(meta)
|
82
|
-
value = meta_value(meta[:string], meta[:integer], meta[:type])
|
83
|
-
{ meta[:identifier].to_sym => value }
|
84
|
-
end
|
85
|
-
|
86
|
-
def string_as_hash(yr_string)
|
87
|
-
string_pointer = yr_string[:string]
|
88
|
-
string_identifier = yr_string[:identifier]
|
89
|
-
{ string_identifier.to_sym => string_pointer.read_string }
|
90
158
|
end
|
91
159
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
160
|
+
# Internal: Parse and convert metadata values to appropriate Ruby types.
|
161
|
+
#
|
162
|
+
# This method handles basic type conversion for metadata values extracted
|
163
|
+
# from rule source code. It recognizes quoted strings, boolean literals,
|
164
|
+
# and numeric values, converting them to appropriate Ruby types.
|
165
|
+
#
|
166
|
+
# value - A String containing the raw metadata value from rule source
|
167
|
+
#
|
168
|
+
# Examples
|
169
|
+
#
|
170
|
+
# parse_meta_value('"hello"') # => "hello"
|
171
|
+
# parse_meta_value('true') # => true
|
172
|
+
# parse_meta_value('42') # => 42
|
173
|
+
# parse_meta_value('other') # => "other"
|
174
|
+
#
|
175
|
+
# Returns the parsed value in the appropriate Ruby type.
|
176
|
+
def parse_meta_value(value)
|
177
|
+
case value
|
178
|
+
when /^".*"$/
|
179
|
+
value[1...-1] # Remove quotes
|
180
|
+
when /^true$/i
|
181
|
+
true
|
182
|
+
when /^false$/i
|
183
|
+
false
|
184
|
+
when /^\d+$/
|
185
|
+
value.to_i
|
97
186
|
else
|
98
|
-
|
187
|
+
value
|
99
188
|
end
|
100
189
|
end
|
101
190
|
end
|
@@ -0,0 +1,224 @@
|
|
1
|
+
module Yara
|
2
|
+
# Public: Collection of ScanResult objects from YARA scanning operations.
|
3
|
+
#
|
4
|
+
# ScanResults acts as an enumerable container for individual rule matches,
|
5
|
+
# providing convenient methods for accessing and querying scan results. It
|
6
|
+
# supports standard collection operations and offers specialized methods for
|
7
|
+
# common YARA use cases like checking for any matches or extracting rule names.
|
8
|
+
#
|
9
|
+
# This class is typically returned by Scanner#scan when no block is provided,
|
10
|
+
# containing all rules that matched during the scanning operation.
|
11
|
+
#
|
12
|
+
# Examples
|
13
|
+
#
|
14
|
+
# results = scanner.scan(data)
|
15
|
+
#
|
16
|
+
# if results.matched?
|
17
|
+
# puts "Found #{results.size} matches"
|
18
|
+
# results.each { |match| puts match.rule_name }
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# rule_names = results.matching_rules
|
22
|
+
# first_match = results.first
|
23
|
+
class ScanResults
|
24
|
+
include Enumerable
|
25
|
+
|
26
|
+
# Public: Initialize a new ScanResults collection.
|
27
|
+
#
|
28
|
+
# Creates an empty results collection that can be populated with ScanResult
|
29
|
+
# objects. This is typically called internally by Scanner during scanning
|
30
|
+
# operations.
|
31
|
+
#
|
32
|
+
# results - An optional Array of ScanResult objects (default: empty array)
|
33
|
+
#
|
34
|
+
# Examples
|
35
|
+
#
|
36
|
+
# # Typically created internally by Scanner
|
37
|
+
# results = ScanResults.new
|
38
|
+
# results << scan_result
|
39
|
+
def initialize(results = [])
|
40
|
+
@results = results
|
41
|
+
end
|
42
|
+
|
43
|
+
# Public: Enumerate over all scan results.
|
44
|
+
#
|
45
|
+
# Implements the Enumerable interface, allowing standard collection methods
|
46
|
+
# like map, select, reject, etc. to be used on the results collection.
|
47
|
+
#
|
48
|
+
# block - Block that receives each ScanResult object
|
49
|
+
#
|
50
|
+
# Examples
|
51
|
+
#
|
52
|
+
# results.each { |result| puts result.rule_name }
|
53
|
+
# matched_names = results.map(&:rule_name)
|
54
|
+
# malware_results = results.select { |r| r.rule_meta[:category] == 'malware' }
|
55
|
+
#
|
56
|
+
# Returns an Enumerator when no block given, otherwise returns self.
|
57
|
+
def each(&block)
|
58
|
+
@results.each(&block)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Public: Add a ScanResult to this collection.
|
62
|
+
#
|
63
|
+
# This method is used internally during scanning to accumulate matching
|
64
|
+
# rules. It appends the result to the internal results array.
|
65
|
+
#
|
66
|
+
# result - A ScanResult object to add to the collection
|
67
|
+
#
|
68
|
+
# Examples
|
69
|
+
#
|
70
|
+
# results = ScanResults.new
|
71
|
+
# results << ScanResult.new("MyRule", rule_ptr)
|
72
|
+
#
|
73
|
+
# Returns self for method chaining.
|
74
|
+
def <<(result)
|
75
|
+
@results << result
|
76
|
+
end
|
77
|
+
|
78
|
+
# Public: Get all scan results as an array.
|
79
|
+
#
|
80
|
+
# Returns the internal array of ScanResult objects. This method is provided
|
81
|
+
# for compatibility and direct access to the underlying collection.
|
82
|
+
#
|
83
|
+
# Examples
|
84
|
+
#
|
85
|
+
# all_results = results.matches
|
86
|
+
# puts "Found #{all_results.length} matches"
|
87
|
+
#
|
88
|
+
# Returns an Array of ScanResult objects.
|
89
|
+
def matches
|
90
|
+
@results
|
91
|
+
end
|
92
|
+
|
93
|
+
# Public: Extract the names of all matching rules.
|
94
|
+
#
|
95
|
+
# This convenience method returns just the rule names from all results,
|
96
|
+
# which is commonly needed for logging, reporting, or further processing
|
97
|
+
# of scan results.
|
98
|
+
#
|
99
|
+
# Examples
|
100
|
+
#
|
101
|
+
# rule_names = results.matching_rules
|
102
|
+
# puts "Matched: #{rule_names.join(', ')}"
|
103
|
+
#
|
104
|
+
# Returns an Array of String rule names.
|
105
|
+
def matching_rules
|
106
|
+
@results.map(&:rule_name)
|
107
|
+
end
|
108
|
+
|
109
|
+
# Public: Check if any rules matched during scanning.
|
110
|
+
#
|
111
|
+
# This is a convenience method to test whether the scan found any matches
|
112
|
+
# without needing to check the size or examine individual results.
|
113
|
+
#
|
114
|
+
# Examples
|
115
|
+
#
|
116
|
+
# if results.matched?
|
117
|
+
# puts "Scan found matches!"
|
118
|
+
# else
|
119
|
+
# puts "No matches found"
|
120
|
+
# end
|
121
|
+
#
|
122
|
+
# Returns true if there are any results, false otherwise.
|
123
|
+
def matched?
|
124
|
+
!@results.empty?
|
125
|
+
end
|
126
|
+
|
127
|
+
# Public: Alias for matched? method.
|
128
|
+
#
|
129
|
+
# Provides an alternative method name that may be more natural in some
|
130
|
+
# contexts, particularly when used in conditional expressions.
|
131
|
+
#
|
132
|
+
# Examples
|
133
|
+
#
|
134
|
+
# puts "Clean file" unless results.match?
|
135
|
+
#
|
136
|
+
# Returns true if there are any results, false otherwise.
|
137
|
+
alias_method :match?, :matched?
|
138
|
+
|
139
|
+
# Public: Get the number of matching rules.
|
140
|
+
#
|
141
|
+
# Returns the count of ScanResult objects in this collection, indicating
|
142
|
+
# how many rules matched during the scan operation.
|
143
|
+
#
|
144
|
+
# Examples
|
145
|
+
#
|
146
|
+
# puts "#{results.size} rules matched"
|
147
|
+
# alert_count = results.size
|
148
|
+
#
|
149
|
+
# Returns an Integer count of results.
|
150
|
+
def size
|
151
|
+
@results.size
|
152
|
+
end
|
153
|
+
|
154
|
+
# Public: Aliases for size method.
|
155
|
+
#
|
156
|
+
# These provide alternative method names for getting the collection size,
|
157
|
+
# maintaining compatibility with standard Ruby collection interfaces.
|
158
|
+
alias_method :length, :size
|
159
|
+
alias_method :count, :size
|
160
|
+
|
161
|
+
# Public: Get the first scan result.
|
162
|
+
#
|
163
|
+
# Returns the first ScanResult object in the collection, or nil if the
|
164
|
+
# collection is empty. Useful when you expect only one match or want to
|
165
|
+
# examine the first match found.
|
166
|
+
#
|
167
|
+
# Examples
|
168
|
+
#
|
169
|
+
# first_match = results.first
|
170
|
+
# puts first_match.rule_name if first_match
|
171
|
+
#
|
172
|
+
# Returns a ScanResult object or nil if collection is empty.
|
173
|
+
def first
|
174
|
+
@results.first
|
175
|
+
end
|
176
|
+
|
177
|
+
# Public: Get the last scan result.
|
178
|
+
#
|
179
|
+
# Returns the last ScanResult object in the collection, or nil if the
|
180
|
+
# collection is empty. The order depends on the sequence in which rules
|
181
|
+
# matched during scanning.
|
182
|
+
#
|
183
|
+
# Examples
|
184
|
+
#
|
185
|
+
# last_match = results.last
|
186
|
+
# puts "Final match: #{last_match.rule_name}" if last_match
|
187
|
+
#
|
188
|
+
# Returns a ScanResult object or nil if collection is empty.
|
189
|
+
def last
|
190
|
+
@results.last
|
191
|
+
end
|
192
|
+
|
193
|
+
# Public: Check if the results collection is empty.
|
194
|
+
#
|
195
|
+
# Returns true if no rules matched during scanning, false otherwise.
|
196
|
+
# This is the inverse of matched? and can be useful for control flow.
|
197
|
+
#
|
198
|
+
# Examples
|
199
|
+
#
|
200
|
+
# puts "No threats detected" if results.empty?
|
201
|
+
# process_results unless results.empty?
|
202
|
+
#
|
203
|
+
# Returns true if no results exist, false otherwise.
|
204
|
+
def empty?
|
205
|
+
@results.empty?
|
206
|
+
end
|
207
|
+
|
208
|
+
# Public: Convert results to a plain array.
|
209
|
+
#
|
210
|
+
# Returns a duplicate of the internal results array, allowing manipulation
|
211
|
+
# without affecting the original ScanResults object. This is useful when
|
212
|
+
# you need to work with the results as a standard Ruby array.
|
213
|
+
#
|
214
|
+
# Examples
|
215
|
+
#
|
216
|
+
# array_copy = results.to_a
|
217
|
+
# sorted_results = results.to_a.sort_by(&:rule_name)
|
218
|
+
#
|
219
|
+
# Returns a new Array containing all ScanResult objects.
|
220
|
+
def to_a
|
221
|
+
@results.dup
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|