yara-normalize 0.4.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.rdoc +69 -35
- data/lib/yara-normalize/yara-normalize.rb +153 -108
- metadata +6 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 13f41905bf9f1f9e8d8f30146578908c1752bffb95f8058cd849f985655104d0
|
|
4
|
+
data.tar.gz: f03ec512ead274a1e8990fe2d6448c2f62361e3a5729db0f0c7f24ea10dc69d9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 485d6e7e454a7ea967b11d9065f31403a7e8b7a08b9b218d674d4007f07cbe2ec702258794acf021b75b4a35627dcbe6dc4eda2d91b17f37ee392f235fdc5005
|
|
7
|
+
data.tar.gz: 2379f78b6e1b4d283a4e0f209e76a18597da2b8a8e0b4e596c85e86ad4e59a2c69c748f72e857c86186ebaa460a0e91fb3d58e7308d1308bcd7898042caccb14
|
data/README.rdoc
CHANGED
|
@@ -1,35 +1,34 @@
|
|
|
1
1
|
= yara-normalize
|
|
2
2
|
|
|
3
|
-
Normalizes
|
|
4
|
-
|
|
3
|
+
Normalizes YARA signatures into a repeatable, stable hash even when
|
|
4
|
+
non-semantic changes are made (whitespace, comments, tag ordering, variable
|
|
5
|
+
renaming, etc.).
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
with $a $b $c, etc. Then hash the result of this.
|
|
7
|
+
To enable consistent comparisons between YARA rules, a uniform fingerprinting
|
|
8
|
+
standard is applied:
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
1. *Strings section* — each string value (the part after the '=') is extracted,
|
|
11
|
+
sorted alphabetically, and the sorted list is hashed with SHA-256. Variable
|
|
12
|
+
names ($a, $mshtmlExec_1, …) are excluded from the hash so that renaming
|
|
13
|
+
does not change the fingerprint.
|
|
11
14
|
|
|
12
|
-
|
|
15
|
+
2. *Condition section* — variable references ($name, #name) are replaced with
|
|
16
|
+
positional tokens ($0, $1, …) in order of first appearance, so cosmetic
|
|
17
|
+
renames do not affect the hash. The resulting text is hashed with SHA-256.
|
|
18
|
+
|
|
19
|
+
The rule fingerprint is:
|
|
20
|
+
|
|
21
|
+
yn<VERSION>:<last-16-hex-chars-of-strings-SHA256>:<last-10-hex-chars-of-condition-SHA256>
|
|
13
22
|
|
|
14
|
-
|
|
23
|
+
Prior to version 0.4.0 the fingerprint used MD5 and carried the prefix +yn01+.
|
|
24
|
+
Since 0.4.0 the fingerprint uses SHA-256 and carries the prefix +yn02+. The
|
|
25
|
+
two identifier series are not interchangeable.
|
|
26
|
+
|
|
27
|
+
== Usage
|
|
15
28
|
|
|
16
29
|
require 'yara-normalize'
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
meta:
|
|
20
|
-
weight =1
|
|
21
|
-
strings:
|
|
22
|
-
$="wtoi" nocase
|
|
23
|
-
$ ="wtol" nocase
|
|
24
|
-
$= "wtof" nocase
|
|
25
|
-
$ = "wtodb" nocase
|
|
26
|
-
condition:
|
|
27
|
-
any of them
|
|
28
|
-
}
|
|
29
|
-
EOS
|
|
30
|
-
yn = YaraTools::YaraRule.new(sig)
|
|
31
|
-
puts yn.hash # => yn01:488085c947cb22ed:d936fceffe
|
|
32
|
-
puts yn.normalize # =>
|
|
30
|
+
|
|
31
|
+
sig = <<~EOS
|
|
33
32
|
rule DataConversion__wide : IntegerParsing DataConversion {
|
|
34
33
|
meta:
|
|
35
34
|
weight = 1
|
|
@@ -41,22 +40,57 @@ See test cases.
|
|
|
41
40
|
condition:
|
|
42
41
|
any of them
|
|
43
42
|
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
43
|
+
EOS
|
|
44
|
+
|
|
45
|
+
yn = YaraTools::YaraRule.new(sig)
|
|
46
|
+
|
|
47
|
+
puts yn.hash
|
|
48
|
+
# => yn02:6783b7082bed88dc:6821e3f6a3
|
|
49
|
+
|
|
50
|
+
puts yn.name # => DataConversion__wide
|
|
51
|
+
pp yn.tags # => ["IntegerParsing", "DataConversion"]
|
|
52
|
+
pp yn.meta # => {"weight"=>"1"}
|
|
53
|
+
pp yn.strings # => ["$ = \"wtoi\" nocase", ...]
|
|
54
|
+
|
|
55
|
+
puts yn.normalize
|
|
56
|
+
# => rule DataConversion__wide : IntegerParsing DataConversion {
|
|
57
|
+
# meta:
|
|
58
|
+
# weight = 1
|
|
59
|
+
# strings:
|
|
60
|
+
# $ = "wtoi" nocase
|
|
61
|
+
# $ = "wtol" nocase
|
|
62
|
+
# $ = "wtof" nocase
|
|
63
|
+
# $ = "wtodb" nocase
|
|
64
|
+
# condition:
|
|
65
|
+
# any of them
|
|
66
|
+
# }
|
|
67
|
+
|
|
68
|
+
Splitting a multi-rule file:
|
|
69
|
+
|
|
70
|
+
rules = YaraTools::Splitter.split(File.read("ruleset.yar"))
|
|
71
|
+
rules.each { |r| puts "#{r.name}: #{r.hash}" }
|
|
72
|
+
|
|
73
|
+
== Security notes
|
|
74
|
+
|
|
75
|
+
* Fingerprints use SHA-256 (as of yn02). MD5-based yn01 hashes should be
|
|
76
|
+
considered legacy and re-computed.
|
|
77
|
+
* +YaraRule#hash+ overrides Ruby's +Object#hash+. Do *not* use +YaraRule+
|
|
78
|
+
objects as Hash keys; the method returns a String fingerprint, not the
|
|
79
|
+
Integer that Ruby's Hash tables require.
|
|
47
80
|
|
|
48
81
|
== Contributing to yara-normalize
|
|
49
|
-
|
|
50
|
-
* Check out the latest master to make sure the feature hasn't been implemented
|
|
51
|
-
|
|
82
|
+
|
|
83
|
+
* Check out the latest master to make sure the feature hasn't been implemented
|
|
84
|
+
or the bug hasn't been fixed yet.
|
|
85
|
+
* Check out the issue tracker to make sure someone already hasn't requested it
|
|
86
|
+
and/or contributed it.
|
|
52
87
|
* Fork the project.
|
|
53
88
|
* Start a feature/bugfix branch.
|
|
54
89
|
* Commit and push until you are happy with your contribution.
|
|
55
|
-
* Make sure to add tests for it. This is important so I don't break it in a
|
|
56
|
-
|
|
90
|
+
* Make sure to add tests for it. This is important so I don't break it in a
|
|
91
|
+
future version unintentionally.
|
|
92
|
+
* Please try not to mess with the Rakefile, version, or history.
|
|
57
93
|
|
|
58
94
|
== Copyright
|
|
59
95
|
|
|
60
|
-
Copyright (c) 2012 chrislee35. See LICENSE.txt for
|
|
61
|
-
further details.
|
|
62
|
-
|
|
96
|
+
Copyright (c) 2012 chrislee35. See LICENSE.txt for further details.
|
|
@@ -1,110 +1,155 @@
|
|
|
1
|
-
require 'digest
|
|
2
|
-
|
|
1
|
+
require 'digest'
|
|
2
|
+
|
|
3
3
|
module YaraTools
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
4
|
+
# Hash format version embedded in every yn-hash identifier.
|
|
5
|
+
# Increment when the normalization algorithm changes so consumers can
|
|
6
|
+
# detect that two hashes are not directly comparable (e.g. yn01 vs yn02).
|
|
7
|
+
VERSION = "02"
|
|
8
|
+
|
|
9
|
+
class YaraRule
|
|
10
|
+
attr_reader :original, :name, :tags, :meta, :strings, :condition, :normalized_strings
|
|
11
|
+
|
|
12
|
+
def initialize(ruletext)
|
|
13
|
+
# Normalize line endings and strip single-line (//) comments before
|
|
14
|
+
# any further parsing so they never appear in meta/strings/condition.
|
|
15
|
+
ruletext = ruletext.gsub(/[\r\n]+/, "\n").gsub(/^\s*\/\/.*$/, '')
|
|
16
|
+
@original = ruletext
|
|
17
|
+
|
|
18
|
+
# Lookup table used by _normalize_condition to replace variable names
|
|
19
|
+
# ($foo, #foo) with stable positional tokens ($0, $1, …) so that
|
|
20
|
+
# cosmetic renames do not affect the normalized condition hash.
|
|
21
|
+
@lookup_table = {}
|
|
22
|
+
@next_replacement = 0
|
|
23
|
+
|
|
24
|
+
# Single-pass regex parse. The rule grammar is:
|
|
25
|
+
# rule <name> [: <tags>] { [meta: …] strings: … condition: … }
|
|
26
|
+
# The .*? quantifiers are non-greedy so they stop at the first matching
|
|
27
|
+
# delimiter keyword rather than consuming the whole file.
|
|
28
|
+
rule_re = /rule\s+([\w\-]+)(\s*:\s*(\w[\w\s]+\w))?\s*\{\s*(meta:\s*(.*?))?strings:\s*(.*?)\s*condition:\s*(.*?)\s*\}/m
|
|
29
|
+
if ruletext =~ rule_re
|
|
30
|
+
name, _, tags, _, meta, strings, condition = $~.captures
|
|
31
|
+
|
|
32
|
+
@name = name
|
|
33
|
+
|
|
34
|
+
# Tags are optional; split on whitespace/commas when present.
|
|
35
|
+
@tags = tags.strip.split(/[,\s]+/) if tags
|
|
36
|
+
|
|
37
|
+
# Parse the meta section into a key/value Hash. Each line has the
|
|
38
|
+
# form: key = value (value may contain spaces and quotes).
|
|
39
|
+
@meta = {}
|
|
40
|
+
if meta
|
|
41
|
+
meta.split(/\n/).each do |m|
|
|
42
|
+
k, v = m.strip.split(/\s*=\s*/, 2)
|
|
43
|
+
@meta[k] = v if v
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Parse the strings section, normalizing whitespace around '=' and
|
|
48
|
+
# canonicalizing any hex byte strings (e.g. { 4D 5A } → { 4d 5a }).
|
|
49
|
+
@normalized_strings = []
|
|
50
|
+
@strings = strings.split(/\n/).map do |s|
|
|
51
|
+
s = s.strip
|
|
52
|
+
|
|
53
|
+
# Collapse any amount of whitespace around '=' to a single ' = '.
|
|
54
|
+
s[/\s*=\s*/, 0] = " = " if s[/\s*=\s*/, 0]
|
|
55
|
+
|
|
56
|
+
# Hex byte strings: normalise spacing and case so that
|
|
57
|
+
# { 4D5A } and { 4d 5a } produce the same output.
|
|
58
|
+
if s =~ /= \{([0-9a-fA-F\s]+)\}/
|
|
59
|
+
hexstr = $1.gsub(/\s+/, '').downcase.scan(/../).join(" ")
|
|
60
|
+
s = s.gsub(/= \{([0-9a-fA-F\s]+)\}/, "= { #{hexstr} }")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Collect only the value portion (right of ' = ') for hashing,
|
|
64
|
+
# so that variable renames ($a → $b) do not change the hash.
|
|
65
|
+
_, val = s.split(/ = /, 2)
|
|
66
|
+
@normalized_strings << (val || s)
|
|
67
|
+
s
|
|
68
|
+
end
|
|
69
|
+
@normalized_strings.sort!
|
|
70
|
+
|
|
71
|
+
@condition = condition.split(/\n/).map(&:strip)
|
|
72
|
+
@normalized_condition = @condition.map { |x| _normalize_condition(x) }
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Replace named variable references in a condition line with positional
|
|
77
|
+
# tokens so that renaming $mshtmlExec_1 → $a does not change the hash.
|
|
78
|
+
# Both count (#) and match ($) sigils are preserved.
|
|
79
|
+
# NOTE: This method is intentionally prefixed with _ to signal that it is
|
|
80
|
+
# an internal implementation detail; do not call it from outside this class.
|
|
81
|
+
def _normalize_condition(condition)
|
|
82
|
+
condition.gsub(/[\$\#]\w+/) do |x|
|
|
83
|
+
key = x[1, 1000]
|
|
84
|
+
@lookup_table[key] ||= begin
|
|
85
|
+
val = @next_replacement.to_s
|
|
86
|
+
@next_replacement += 1
|
|
87
|
+
val
|
|
88
|
+
end
|
|
89
|
+
x[0].chr + @lookup_table[key]
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Return a canonical, human-readable rendering of the rule with
|
|
94
|
+
# consistent indentation and ordering. Tags, meta, strings, and
|
|
95
|
+
# condition are preserved in their original order.
|
|
96
|
+
def normalize
|
|
97
|
+
text = "rule #{@name} "
|
|
98
|
+
text += ": #{@tags.join(' ')} " if @tags && !@tags.empty?
|
|
99
|
+
text += "{\n"
|
|
100
|
+
|
|
101
|
+
if @meta && !@meta.empty?
|
|
102
|
+
text += " meta:\n"
|
|
103
|
+
@meta.each { |k, v| text += " #{k} = #{v}\n" }
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
if @strings && !@strings.empty?
|
|
107
|
+
text += " strings:\n"
|
|
108
|
+
@strings.each { |s| text += " #{s}\n" if s =~ /\w/ }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
if @condition && !@condition.empty?
|
|
112
|
+
text += " condition:\n"
|
|
113
|
+
@condition.each { |c| text += " #{c}\n" if c =~ /\w/ }
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
text + "}"
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Return a stable identifier for this rule in the form:
|
|
120
|
+
# yn<VERSION>:<strings_fingerprint>:<condition_fingerprint>
|
|
121
|
+
#
|
|
122
|
+
# The strings fingerprint is the last 16 hex chars of the SHA-256 digest
|
|
123
|
+
# of the sorted, normalised string values joined by '%'.
|
|
124
|
+
# The condition fingerprint is the last 10 hex chars of the SHA-256 digest
|
|
125
|
+
# of the normalised condition lines joined by '%'.
|
|
126
|
+
#
|
|
127
|
+
# Using SHA-256 (replacing the previous MD5) gives 256-bit collision
|
|
128
|
+
# resistance and avoids MD5's well-known preimage and collision weaknesses.
|
|
129
|
+
#
|
|
130
|
+
# SECURITY NOTE: This method is named `hash` to match the public API, but
|
|
131
|
+
# it overrides Ruby's built-in Object#hash, which is expected to return an
|
|
132
|
+
# Integer for use as a Hash table key. Do NOT use YaraRule objects as Hash
|
|
133
|
+
# keys; use .hash (this method) only for YARA rule fingerprinting.
|
|
134
|
+
def hash
|
|
135
|
+
normalized_strings = @normalized_strings.join("%")
|
|
136
|
+
normalized_condition = @normalized_condition.join("%")
|
|
137
|
+
strings_digest = Digest::SHA256.hexdigest(normalized_strings)
|
|
138
|
+
condition_digest = Digest::SHA256.hexdigest(normalized_condition)
|
|
139
|
+
"yn#{VERSION}:#{strings_digest[-16, 16]}:#{condition_digest[-10, 10]}"
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Splits a multi-rule YARA file into individual YaraRule objects.
|
|
144
|
+
class Splitter
|
|
145
|
+
# Parse a string containing one or more YARA rules and return an Array of
|
|
146
|
+
# YaraRule instances, one per rule found in +ruleset+.
|
|
147
|
+
def self.split(ruleset)
|
|
148
|
+
# Strip line endings and single-line comments before scanning so that
|
|
149
|
+
# comment text cannot interfere with the rule boundary regex.
|
|
150
|
+
clean = ruleset.gsub(/[\r\n]+/, "\n").gsub(/^\s*\/\/.*$/, '')
|
|
151
|
+
rule_re = /(rule\s+([\w\-]+)(\s*:\s*(\w[\w\s]+\w))?\s*\{\s*(meta:\s*(.*?))?strings:\s*(.*?)\s*condition:\s*(.*?)\s*\})/m
|
|
152
|
+
clean.scan(rule_re).map { |rule| YaraRule.new(rule[0]) }
|
|
153
|
+
end
|
|
154
|
+
end
|
|
110
155
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: yara-normalize
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Chris Lee
|
|
8
|
+
autorequire:
|
|
8
9
|
bindir: bin
|
|
9
10
|
cert_chain: []
|
|
10
|
-
date:
|
|
11
|
+
date: 2026-04-25 00:00:00.000000000 Z
|
|
11
12
|
dependencies:
|
|
12
13
|
- !ruby/object:Gem::Dependency
|
|
13
14
|
name: test-unit
|
|
@@ -110,6 +111,7 @@ homepage: https://github.com/chrislee35/yara-normalize
|
|
|
110
111
|
licenses:
|
|
111
112
|
- MIT
|
|
112
113
|
metadata: {}
|
|
114
|
+
post_install_message:
|
|
113
115
|
rdoc_options: []
|
|
114
116
|
require_paths:
|
|
115
117
|
- lib
|
|
@@ -124,7 +126,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
124
126
|
- !ruby/object:Gem::Version
|
|
125
127
|
version: '0'
|
|
126
128
|
requirements: []
|
|
127
|
-
rubygems_version: 3.
|
|
129
|
+
rubygems_version: 3.4.20
|
|
130
|
+
signing_key:
|
|
128
131
|
specification_version: 4
|
|
129
132
|
summary: Normalizes Yara signatures into a repeatable hash even when non-transforming
|
|
130
133
|
changes are made.
|