rouge-lexer-yara 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7986144d60452ab72d171f7c645cf810e5922fd0a85496800c1d9911f569703c
4
- data.tar.gz: 5333ed55cbc923af93359aeeac6342cf2cb2f84e5ce3ea4dfc6a3115811a622d
3
+ metadata.gz: 90fc0f65203838a1ec396fb715f45539ef505dfcea1dd9440b427bb36d298d4e
4
+ data.tar.gz: 0ca8a3118c0100ea89c1de58138cbb2dc60c773834732812bf0480af994fb0bb
5
5
  SHA512:
6
- metadata.gz: e36b57cbcb23e585570ba4c83fe0873fb32221e18cb2ae02906af1aec2d7fa4c8e4149cd3da0791f83ec387adbe119ee9f83464d56e585ac08562d885de3d932
7
- data.tar.gz: f8ddcb2702465c873cb81ab56e095162b462c71511f6c8974fb9e2396931524adb329af6a6744f92b0b353133ee54d208da04842923d872a953c593a5f33c346
6
+ metadata.gz: 54ab053879d6e684b4e99357f58ef64986a07220e8abd0c823df2e30c359af9a053eec91620cc44673c1dc4bb5c9550863df72c9b5a969383ba46f3012141591
7
+ data.tar.gz: b0d6974c999fcf626b7cb6126e7a7b6e09d082acc18a21348fc59ae887b0597d438d8dc4934264f7bc19420893487eb6a35c7ca76bd20b310d08f2a3e770c1f7
@@ -15,27 +15,37 @@ module Rouge
15
15
  return true if text =~ /\A\s*(?:rule|import|include)\b/
16
16
  end
17
17
 
18
- def self.keywords
19
- @keywords ||= Set.new %w(
20
- all and any at condition contains defined endswith
21
- entrypoint filesize for global icontains iendswith
22
- iequals in istartswith matches meta none not of
23
- or private startswith strings them
24
- )
25
- end
26
-
18
+ # Rule-level and top-level declaration keywords (from writingrules.html)
27
19
  def self.keywords_declaration
28
20
  @keywords_declaration ||= Set.new %w(
29
21
  rule import include
30
22
  )
31
23
  end
32
24
 
25
+ # String modifiers only (from writingrules.html "String Modifiers" section)
33
26
  def self.keywords_pseudo
34
27
  @keywords_pseudo ||= Set.new %w(
35
- ascii base64 base64wide fullword nocase wide xor
28
+ ascii base64 base64wide fullword nocase private wide xor
29
+ )
30
+ end
31
+
32
+ # Boolean constants (from writingrules.html)
33
+ def self.keywords_constant
34
+ @keywords_constant ||= Set.new %w(
35
+ true false
36
+ )
37
+ end
38
+
39
+ # Rule modifiers and condition keywords (from writingrules.html)
40
+ def self.keywords
41
+ @keywords ||= Set.new %w(
42
+ all and any at condition contains defined endswith entrypoint
43
+ filesize for global icontains iendswith iequals in istartswith
44
+ matches meta none not of or startswith strings them
36
45
  )
37
46
  end
38
47
 
48
+ # Integer read functions (from writingrules.html "Accessing data at a given position")
39
49
  def self.builtins
40
50
  @builtins ||= Set.new %w(
41
51
  int8 int16 int32 uint8 uint16 uint32
@@ -45,7 +55,11 @@ module Rouge
45
55
 
46
56
  state :root do
47
57
  rule %r/\s+/, Text::Whitespace
58
+
59
+ # single-line comment
48
60
  rule %r(//.*$), Comment::Single
61
+
62
+ # multiline comment
49
63
  rule %r(/\*), Comment::Multiline, :multiline_comment
50
64
 
51
65
  # section labels: meta: strings: condition:
@@ -53,7 +67,8 @@ module Rouge
53
67
  groups Name::Label, Text::Whitespace, Punctuation
54
68
  end
55
69
 
56
- # hex string entry: = { hex_content }
70
+ # hex string assignment: identifier = { ... }
71
+ # Must come before the '=' single-char operator rule
57
72
  rule %r/(=)(\s*)(\{)/m do
58
73
  groups Operator, Text::Whitespace, Str::Other
59
74
  push :hex_string
@@ -62,39 +77,54 @@ module Rouge
62
77
  # double-quoted strings
63
78
  rule %r/"/, Str::Double, :string
64
79
 
65
- # regex literals
66
- rule %r(/) do
80
+ # regex literals: /pattern/flags
81
+ # Only treat '/' as a regex start when preceded by context that
82
+ # implies a value position (after '=', 'matches', or at a
83
+ # definition context). We use a heuristic: if a word char or
84
+ # closing bracket follows '/' it is more likely division;
85
+ # otherwise enter regex. For YARA, regex literals appear only in
86
+ # string definitions ($x = /.../) or after 'matches', so we
87
+ # accept '/' followed by a non-space non-'/' character.
88
+ rule %r((?<![0-9a-zA-Z_\)\]])/(?![/*\s])) do
67
89
  token Str::Regex
68
90
  push :regex
69
91
  end
70
92
 
71
- # variables: $ident, #ident, @ident, !ident, bare $
93
+ # string variables: $ident or bare $
94
+ rule %r/\$\w+/, Name::Variable
95
+ rule %r/\$/, Name::Variable
96
+
97
+ # count (#ident), offset (@ident), length (!ident) references
72
98
  rule %r/[#@!]\w+/, Name::Variable
73
- rule %r/\$\w*/, Name::Variable
74
99
 
75
- # hex numbers
100
+ # hexadecimal numbers (must come before decimal)
76
101
  rule %r/0x[0-9a-fA-F]+/, Num::Hex
77
- # decimal numbers with optional size suffix
102
+
103
+ # floating-point numbers must come before integers so 7.2 isn't split
104
+ rule %r/\d+\.\d+/, Num::Float
105
+
106
+ # decimal integers with optional size suffix (KB, MB)
78
107
  rule %r/\d+(?:KB|MB)?/, Num::Integer
79
108
 
80
- # range operator
109
+ # range operator (..)
81
110
  rule %r/\.\./, Operator
82
111
 
83
112
  # multi-character operators
84
113
  rule %r/==|!=|<=|>=|<<|>>/, Operator
85
- # single-character operators
86
- rule %r/[+\-*\\%&|^~<>=]/, Operator
114
+
115
+ # single-character operators (includes \ for integer division per YARA docs)
116
+ rule %r([+\-*\\/%&|^~<>=]), Operator
87
117
 
88
118
  # punctuation
89
119
  rule %r/[{}()\[\]:.,]/, Punctuation
90
120
 
91
- # word classification
121
+ # identifiers and keywords
92
122
  rule %r/\w+/ do |m|
93
123
  if self.class.keywords_declaration.include?(m[0])
94
124
  token Keyword::Declaration
95
125
  elsif self.class.keywords_pseudo.include?(m[0])
96
126
  token Keyword::Pseudo
97
- elsif m[0] == 'true' || m[0] == 'false'
127
+ elsif self.class.keywords_constant.include?(m[0])
98
128
  token Keyword::Constant
99
129
  elsif self.class.builtins.include?(m[0])
100
130
  token Name::Builtin
@@ -121,11 +151,21 @@ module Rouge
121
151
  state :hex_string do
122
152
  rule %r/\s+/, Text::Whitespace
123
153
  rule %r/\}/, Str::Other, :pop!
154
+
155
+ # Comments inside hex strings
124
156
  rule %r(//.*$), Comment::Single
125
157
  rule %r(/\*), Comment::Multiline, :multiline_comment
158
+
159
+ # Negated nibble wildcard: ~?F or ~??
160
+ rule %r/~[0-9a-fA-F?]{2}/, Str::Other
161
+
162
+ # Two hex nibbles or wildcards (e.g. 4D, ??, A?, ?B)
126
163
  rule %r/[0-9a-fA-F?]{2}/, Str::Other
127
- rule %r/~/, Operator
128
- rule %r/\[[\d\-]*\]/, Str::Other
164
+
165
+ # Jump ranges: [4], [4-8], [10-], [-], etc.
166
+ rule %r/\[\s*\d*\s*(?:-\s*\d*)?\s*\]/, Str::Other
167
+
168
+ # Alternatives separator and grouping
129
169
  rule %r/[|()]/, Punctuation
130
170
  end
131
171
 
data/spec/demos/yara CHANGED
@@ -1,19 +1,27 @@
1
1
  import "pe"
2
2
 
3
- rule ExampleRule : demo
3
+ rule Emotet_Dropper : trojan loader
4
4
  {
5
5
  meta:
6
- author = "Rouge"
7
- score = 75
8
- is_demo = true
6
+ description = "Detects Emotet dropper stage based on strings and PE features"
7
+ author = "Security Research Team"
8
+ date = "2024-03-01"
9
+ malware_family = "Emotet"
10
+ severity = 90
9
11
 
10
12
  strings:
11
- $text = "malware" ascii wide nocase
12
- $hex = { 4D 5A 90 00 ?? [4-8] 50 45 }
13
- $re = /md5: [0-9a-fA-F]{32}/i
13
+ $mz = { 4D 5A }
14
+ $ep_jmp = { E9 ?? ?? ?? ?? 55 8B EC }
15
+ $peb_access = { 64 A1 30 00 00 00 8B 40 0C }
16
+ $str_regkey = "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run" wide nocase
17
+ $str_cmd = "cmd.exe /c" ascii fullword
18
+ $url_pattern = /https?:\/\/[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\/[a-z0-9]{4,}/i
14
19
 
15
20
  condition:
16
- filesize < 5MB and
21
+ $mz at 0 and
22
+ filesize < 2MB and
23
+ pe.number_of_sections >= 3 and
17
24
  uint16(0) == 0x5A4D and
18
- any of them
25
+ (2 of ($str*) or $url_pattern) and
26
+ ($ep_jmp or $peb_access)
19
27
  }
@@ -1,9 +1,10 @@
1
1
  /*
2
2
  YARA Visual Sample
3
- Covers a wide range of YARA syntax elements.
3
+ Comprehensive coverage of YARA syntax for Rouge lexer testing.
4
+ All rules use realistic names and patterns from actual malware research.
4
5
  */
5
6
 
6
- // Import statements
7
+ // ── Imports and includes ─────────────────────────────────────────────────────
7
8
  import "pe"
8
9
  import "elf"
9
10
  import "hash"
@@ -12,365 +13,620 @@ import "dotnet"
12
13
  import "time"
13
14
  import "console"
14
15
  import "string"
16
+ import "cuckoo"
17
+ import "magic"
18
+ import "lnk"
15
19
 
16
- // Include statement
17
- include "other_rules.yar"
18
- include "./includes/more_rules.yar"
20
+ include "hunting/base_rules.yar"
19
21
 
20
- // Simple rule with no strings
22
+ // ── Keyword::Constant: true / false ──────────────────────────────────────────
21
23
  rule AlwaysFalse
22
24
  {
23
25
  condition:
24
26
  false
25
27
  }
26
28
 
27
- // Basic rule with tags and metadata
28
- rule BasicExample : Tag1 Tag2 ExampleTag
29
+ // ── Basic rule with tags and metadata ────────────────────────────────────────
30
+ rule Mirai_Botnet_Loader : botnet iot mirai
29
31
  {
30
32
  meta:
31
- description = "A basic YARA rule example"
32
- author = "Rouge Lexer Test"
33
+ description = "Detects Mirai botnet loader binary"
34
+ author = "Threat Intelligence Team"
33
35
  date = "2024-01-15"
34
- version = 1
35
- is_test = true
36
- score = 75
36
+ md5 = "d41d8cd98f00b204e9800998ecf8427e"
37
+ version = 2
38
+ in_the_wild = true
39
+ score = 85
37
40
 
38
41
  strings:
39
- $text1 = "This is a test string"
40
- $text2 = "Another string with escapes: \t\n\r\\\""
41
- $text3 = "Hex escape: \x4D\x5A"
42
+ $str_telnet = "SCANNER ON" ascii fullword
43
+ $str_killer = "/bin/busybox MIRAI" ascii
44
+ $str_table = "LZRD" wide ascii nocase
45
+ $xor_cfg = "CNIG" xor(0x22-0x44)
46
+ $b64_payload = "TVqQAAMAAAAEAAAA" base64
47
+ $b64w_marker = "loader" base64wide
42
48
 
43
49
  condition:
44
- any of them
50
+ filesize < 1MB and
51
+ uint32(0) == 0x464C457F and
52
+ 3 of ($str*) and
53
+ ($xor_cfg or $b64_payload or $b64w_marker)
45
54
  }
46
55
 
47
- // String modifiers
48
- rule StringModifiers
56
+ // ── Hex strings: wildcards, jumps, alternatives, negation ────────────────────
57
+ rule Cobalt_Strike_Beacon : apt backdoor cobaltstrike
49
58
  {
59
+ meta:
60
+ description = "Detects Cobalt Strike beacon shellcode and loader"
61
+ author = "Detection Engineering"
62
+ reference = "https://www.cobaltstrike.com/help-beacon"
63
+ score = 95
64
+
50
65
  strings:
51
- $wide_str = "Borland" wide
52
- $ascii_str = "text" ascii
53
- $wide_ascii = "mixed" wide ascii
54
- $nocase_str = "foobar" nocase
55
- $fullword_str = "domain" fullword
56
- $xor_str = "This program cannot" xor
57
- $xor_range = "encrypted" xor(0x01-0xff)
58
- $base64_str = "This program cannot" base64
59
- $base64wide_str = "secret" base64wide
60
- $base64_custom = "data" base64("!@#$%^&*(){}[].,|ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstu")
61
- $combined = "malware" wide ascii nocase fullword
62
- $private_str = "hidden" private
66
+ // MZ header
67
+ $mz = { 4D 5A }
68
+
69
+ // Beacon config watermark (exact)
70
+ $cfg_magic = { 00 01 BE EF 00 01 BE EF }
71
+
72
+ // Reflective loader stub with wildcards
73
+ $refl_loader = { 55 8B EC 83 EC ?? 56 57 8B 7D ?? 8B F7 }
74
+
75
+ // Sleep/jitter obfuscation pattern with jump
76
+ $sleep_mask = { FC 48 83 E4 F0 E8 [4-8] 41 51 41 50 52 51 }
77
+
78
+ // XOR decryption loop variant
79
+ $xor_loop = { 8B ?? 30 ?? 40 3B ?? 75 F? }
80
+
81
+ // Stageless config block (alternatives)
82
+ $cfg_block = { ( 69 68 69 68 | 2E 2E 2E 2E ) 00 00 00 00 }
83
+
84
+ // Negated null byte (any non-zero byte at position)
85
+ $not_null = { 4D 5A ~00 90 00 }
86
+
87
+ // Private string (not counted in any of them)
88
+ $internal = "ReflectiveLoader" ascii private
89
+
90
+ // Regex: C2 URL pattern
91
+ $c2_url = /https?:\/\/[a-z0-9\-]+\.[a-z]{2,6}\/[a-zA-Z0-9]{4,12}\.(jpg|png|gif)/i
92
+
93
+ // Regex: named pipe pattern used by Cobalt Strike
94
+ $named_pipe = /\\\\\.\\pipe\\[a-zA-Z0-9]{4,20}/
63
95
 
64
96
  condition:
65
- any of them
97
+ $mz at 0 and
98
+ filesize < 10MB and
99
+ pe.number_of_sections > 2 and
100
+ pe.characteristics & pe.DLL == 0 and
101
+ (
102
+ $cfg_magic or
103
+ ($refl_loader and $sleep_mask) or
104
+ ($xor_loop and $cfg_block) or
105
+ $not_null
106
+ ) and
107
+ not $c2_url
66
108
  }
67
109
 
68
- // Hexadecimal strings with various features
69
- rule HexStrings
110
+ // ── String modifiers ──────────────────────────────────────────────────────────
111
+ rule Ransomware_String_Patterns : ransomware
70
112
  {
113
+ meta:
114
+ description = "Generic ransomware string indicators"
115
+ score = 75
116
+
71
117
  strings:
72
- // Basic hex string
73
- $hex1 = { E2 34 A1 C8 23 FB }
118
+ $note_wide = "YOUR FILES HAVE BEEN ENCRYPTED" wide
119
+ $note_ascii = "send bitcoin" ascii
120
+ $note_nocase = "Recovery Instructions" nocase
121
+ $ext_full = ".locked" fullword
122
+ $key_xor = "AES-256-CBC" xor
123
+ $key_xor_range = "ENCRYPTED" xor(0x01-0xff)
124
+ $b64_ransom = "bitcoin" base64
125
+ $b64w_ransom = "wallet" base64wide
126
+ $b64_alphabet = "data" base64("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
127
+ $combo = "shadow" wide ascii nocase fullword
74
128
 
75
- // Wild-cards
76
- $hex2 = { E2 34 ?? C8 A? FB }
129
+ condition:
130
+ 4 of them
131
+ }
132
+
133
+ // ── Global and private rule modifiers ────────────────────────────────────────
134
+ global rule File_Size_Gate
135
+ {
136
+ condition:
137
+ filesize < 20MB
138
+ }
139
+
140
+ private rule Is_PE_File
141
+ {
142
+ condition:
143
+ uint16(0) == 0x5A4D and
144
+ uint32(uint32(0x3C)) == 0x00004550
145
+ }
77
146
 
78
- // Not operator
79
- $hex3 = { F4 23 ~00 62 B4 }
80
- $hex4 = { F4 23 ~?0 62 B4 }
147
+ global private rule Max_Size_Strict
148
+ {
149
+ condition:
150
+ filesize < 50MB
151
+ }
81
152
 
82
- // Jumps
83
- $hex5 = { F4 23 [4-6] 62 B4 }
84
- $hex6 = { FE 39 45 [6] 89 00 }
85
- $hex7 = { FE 39 45 [10-] 89 00 }
86
- $hex8 = { FE 39 45 [-] 89 00 }
153
+ // ── Data access built-in functions ───────────────────────────────────────────
154
+ rule ELF_Backdoor_Sysrv : elf backdoor
155
+ {
156
+ meta:
157
+ description = "Detects Sysrv cryptomining backdoor ELF variant"
158
+ score = 80
87
159
 
88
- // Alternatives
89
- $hex9 = { F4 23 ( 62 B4 | 56 ) 45 }
90
- $hex10 = { F4 23 ( 62 B4 | 56 | 45 ?? 67 ) 45 }
160
+ strings:
161
+ $str_xmrig = "stratum+tcp://" ascii
162
+ $str_pool = "xmrigDaemon" ascii
163
+ $hex_drop = { 7F 45 4C 46 02 01 01 00 00 00 00 00 00 00 00 00 }
91
164
 
92
165
  condition:
93
- any of ($hex*)
166
+ uint8(0) == 0x7F and
167
+ uint8(1) == 0x45 and
168
+ uint8(2) == 0x4C and
169
+ uint8(3) == 0x46 and
170
+ int8(4) == 0x02 and
171
+ uint16(0x10) == 0x0002 and
172
+ uint32(0x14) == 0x00000003 and
173
+ int16be(0) == 0x7F45 and
174
+ int32be(0) == 0x7F454C46 and
175
+ uint8be(0) == 0x7F and
176
+ uint16be(0) == 0x7F45 and
177
+ uint32be(0) == 0x7F454C46 and
178
+ ($str_xmrig or $str_pool or $hex_drop)
94
179
  }
95
180
 
96
- // Regular expressions
97
- rule RegexPatterns
181
+ // ── Counting, offsets, and length references ─────────────────────────────────
182
+ rule PowerShell_Dropper_Heuristic : powershell dropper
98
183
  {
184
+ meta:
185
+ description = "Heuristic detection of PowerShell-based dropper activity"
186
+ score = 65
187
+
99
188
  strings:
100
- $re1 = /md5: [0-9a-fA-F]{32}/
101
- $re2 = /state: (on|off)/
102
- $re3 = /foo/i
103
- $re4 = /bar./s
104
- $re5 = /baz./is
105
- $re6 = /https?:\/\/[^\s]+/
106
- $re7 = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/
189
+ $cmd_iex = "IEX" ascii nocase
190
+ $cmd_invoke = "Invoke-Expression" nocase
191
+ $cmd_enc = "-EncodedCommand" nocase
192
+ $cmd_bypass = "ExecutionPolicy Bypass" nocase
193
+ $cmd_hidden = "-WindowStyle Hidden" nocase
194
+ $url_http = /https?:\/\/[^\s"']{12,}/
107
195
 
108
196
  condition:
109
- any of them
197
+ #cmd_iex >= 2 and
198
+ #url_http > 0 and
199
+ @cmd_iex[1] < 0x1000 and
200
+ !cmd_enc[1] > 5 and
201
+ $cmd_bypass in (0..512) and
202
+ (2 of ($cmd*))
110
203
  }
111
204
 
112
- // Global rule
113
- global rule SizeLimit
205
+ // ── Quantifiers, sets, and wildcards ─────────────────────────────────────────
206
+ rule APT_Lateral_Movement_Tools : apt lateral
114
207
  {
208
+ meta:
209
+ description = "Detects tooling associated with APT lateral movement"
210
+ score = 70
211
+
212
+ strings:
213
+ $psexec1 = "PsExec" ascii nocase
214
+ $psexec2 = "\\\\%s\\ADMIN$" ascii
215
+ $wmi1 = "WMI" ascii
216
+ $wmi2 = "SELECT * FROM Win32_Process" ascii nocase
217
+ $smb1 = "\\PIPE\\svcctl" ascii
218
+ $smb2 = "\\PIPE\\samr" ascii
219
+ $rdp1 = "mstsc" ascii nocase
220
+ $rdp2 = "Terminal Server" ascii nocase
221
+
115
222
  condition:
116
- filesize < 2MB
223
+ 2 of ($psexec*) or
224
+ all of ($wmi*) or
225
+ any of ($smb*) or
226
+ 1 of ($rdp*) or
227
+ none of ($psexec*) and
228
+ (3 of them)
117
229
  }
118
230
 
119
- // Private rule
120
- private rule IsExecutable
231
+ // ── Anonymous strings ─────────────────────────────────────────────────────────
232
+ rule Generic_Webshell : webshell
121
233
  {
234
+ meta:
235
+ description = "Generic PHP/ASP webshell detection"
236
+
237
+ strings:
238
+ $ = "eval(base64_decode(" ascii nocase
239
+ $ = "eval(gzinflate(" ascii nocase
240
+ $ = "eval(str_rot13(" ascii nocase
241
+ $ = "passthru($_" ascii
242
+ $ = "shell_exec($_" ascii
243
+
122
244
  condition:
123
- uint16(0) == 0x5A4D
245
+ 2 of them
124
246
  }
125
247
 
126
- // Global private rule
127
- global private rule NotTooLarge
248
+ // ── For loops and iterators ───────────────────────────────────────────────────
249
+ rule Multi_Stage_Loader_ForLoop : loader
128
250
  {
251
+ meta:
252
+ description = "Uses for-loops to detect multi-stage loader offsets"
253
+ score = 60
254
+
255
+ strings:
256
+ $stage_marker = { DE AD BE EF }
257
+ $stage_end = { CA FE BA BE }
258
+
129
259
  condition:
130
- filesize < 10MB
260
+ for all i in (1..#stage_marker) : (
261
+ @stage_marker[i] < 0x1000
262
+ ) and
263
+ for any i in (1..#stage_end) : (
264
+ @stage_end[i] > 0x400
265
+ ) and
266
+ for 2 i in (1,2,3) : (
267
+ @stage_marker[i] + 16 == @stage_end[i]
268
+ ) and
269
+ for any s in ("deadbeef", "cafebabe") : (
270
+ hash.md5(0, 4) == s
271
+ )
131
272
  }
132
273
 
133
- // Counting strings and offsets
134
- rule CountingAndOffsets
274
+ // ── Arithmetic, bitwise and string operators ──────────────────────────────────
275
+ rule PE_Suspicious_Characteristics : pe suspicious
135
276
  {
277
+ meta:
278
+ description = "Suspicious PE file with unusual section/import characteristics"
279
+ score = 55
280
+
136
281
  strings:
137
- $a = "dummy1"
138
- $b = "dummy2"
282
+ $str_virt = "VirtualAlloc" ascii
283
+ $str_write = "WriteProcessMemory" ascii
139
284
 
140
285
  condition:
141
- #a == 6 and #b > 10 and
142
- $a at 100 and
143
- $b in (100..filesize) and
144
- @a[1] < 1000 and
145
- !a[1] > 5
286
+ Is_PE_File and
287
+ pe.number_of_sections > 5 + 2 and
288
+ pe.number_of_sections < 20 - 2 and
289
+ filesize > 1024 * 4 and
290
+ filesize % 512 == 0 and
291
+ (pe.characteristics & pe.EXECUTABLE_IMAGE) != 0 and
292
+ (pe.characteristics & pe.DLL) == 0 and
293
+ (pe.characteristics | 0x0002) == pe.characteristics and
294
+ (pe.opthdr_magic ^ 0x010B) == 0 and
295
+ ~pe.characteristics == 0xFFFF7FFF and
296
+ pe.entry_point << 0 == pe.entry_point and
297
+ pe.entry_point >> 0 == pe.entry_point and
298
+ 2 of ($str*)
146
299
  }
147
300
 
148
- // Data access functions
149
- rule DataAccess
301
+ // ── String comparison operators ───────────────────────────────────────────────
302
+ rule PE_Section_Name_Checks : pe packer
150
303
  {
304
+ meta:
305
+ description = "Detects common packer section names in PE files"
306
+ score = 50
307
+
151
308
  condition:
152
- // MZ signature and PE signature
153
- uint16(0) == 0x5A4D and
154
- uint32(uint32(0x3C)) == 0x00004550 and
155
- int8(0) == 0x4D and
156
- int16(0) == 0x5A4D and
157
- int32(0) != 0 and
158
- uint8(0) == 0x4D and
159
- int8be(0) == 0x4D and
160
- int16be(0) == 0x4D5A and
161
- int32be(0) != 0 and
162
- uint8be(0) == 0x4D and
163
- uint16be(0) == 0x4D5A and
164
- uint32be(0) != 0
309
+ Is_PE_File and
310
+ for any section in pe.sections : (
311
+ section.name contains "UPX" or
312
+ section.name icontains "upx" or
313
+ section.name startswith ".aspack" or
314
+ section.name istartswith ".ASP" or
315
+ section.name endswith "0" or
316
+ section.name iendswith "PACK" or
317
+ section.name iequals ".text" or
318
+ section.name matches /^\.[a-z]{1,5}[0-9]$/
319
+ )
165
320
  }
166
321
 
167
- // Sets of strings and quantifiers
168
- rule StringSets
322
+ // ── defined operator ──────────────────────────────────────────────────────────
323
+ rule PE_With_Exports : pe export
169
324
  {
170
- strings:
171
- $foo1 = "foo1"
172
- $foo2 = "foo2"
173
- $foo3 = "foo3"
174
- $bar1 = "bar1"
175
- $bar2 = "bar2"
325
+ meta:
326
+ description = "PE file with suspicious exported function names"
327
+ score = 60
176
328
 
177
329
  condition:
178
- 2 of ($foo*) and
179
- all of them and
180
- any of ($bar*) and
181
- none of ($foo*) or
182
- 1 of ($*)
330
+ Is_PE_File and
331
+ defined pe.number_of_exports and
332
+ pe.number_of_exports > 0 and
333
+ pe.exports("ReflectiveDll_Inject")
183
334
  }
184
335
 
185
- // Anonymous strings
186
- rule AnonymousStrings
336
+ // ── PE module usage ───────────────────────────────────────────────────────────
337
+ rule Signed_But_Suspicious_PE : pe signed
187
338
  {
339
+ meta:
340
+ description = "PE file that is signed but matches suspicious import patterns"
341
+ score = 65
342
+
188
343
  strings:
189
- $ = "dummy1"
190
- $ = "dummy2"
344
+ $susp_import1 = "CreateRemoteThread" ascii
345
+ $susp_import2 = "NtUnmapViewOfSection" ascii
346
+ $susp_import3 = "ZwWriteVirtualMemory" ascii
191
347
 
192
348
  condition:
193
- 1 of them
349
+ Is_PE_File and
350
+ pe.is_signed and
351
+ pe.number_of_signatures >= 1 and
352
+ pe.imports("kernel32.dll", "VirtualAllocEx") and
353
+ pe.imports("kernel32.dll", "WriteProcessMemory") and
354
+ pe.machine == pe.MACHINE_I386 and
355
+ pe.subsystem == pe.SUBSYSTEM_WINDOWS_GUI and
356
+ pe.is_32bit() and
357
+ pe.imphash() != "" and
358
+ 2 of ($susp_import*)
194
359
  }
195
360
 
196
- // For loops and iterators
197
- rule ForLoops
361
+ // ── ELF module usage ──────────────────────────────────────────────────────────
362
+ rule ELF_Packed_Binary : elf packer
198
363
  {
364
+ meta:
365
+ description = "Detects ELF binaries with suspicious packing indicators"
366
+ score = 55
367
+
199
368
  strings:
200
- $a = "dummy1"
201
- $b = "dummy2"
369
+ $upx_sig = { 55 50 58 21 }
202
370
 
203
371
  condition:
204
- for all i in (1,2,3) : ( @a[i] + 10 == @b[i] ) and
205
- for all i in (1..#a) : ( @a[i] < 100 ) and
206
- for any of ($a,$b) : ( $ at 0 ) and
207
- for 2 i in (1..#a) : ( @a[i] < 100 )
372
+ uint8(0) == 0x7F and
373
+ elf.machine == elf.EM_X86_64 and
374
+ elf.type == elf.ET_EXEC and
375
+ elf.number_of_sections < 4 and
376
+ for any section in elf.sections : (
377
+ section.name == "" or
378
+ section.flags & elf.SHF_EXECINSTR != 0
379
+ ) and
380
+ $upx_sig
208
381
  }
209
382
 
210
- // Using PE module
211
- rule PEModuleExample
383
+ // ── Hash module usage ─────────────────────────────────────────────────────────
384
+ rule Known_Bad_Hash_Emotet : emotet trojan
212
385
  {
386
+ meta:
387
+ description = "Matches known Emotet sample by hash"
388
+ score = 100
389
+
213
390
  condition:
214
- pe.entry_point == 0x1000 and
215
- pe.number_of_sections > 3 and
216
- pe.characteristics & pe.DLL != 0
391
+ hash.md5(0, filesize) == "5e4b6d8e4a3f9c2b1a7e0d3c6f9b2e5a" or
392
+ hash.sha1(0, filesize) == "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" or
393
+ hash.sha256(0, filesize) == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" or
394
+ hash.crc32(0, 512) == 0xDEADBEEF or
395
+ hash.checksum32(0, filesize) > 0x1000000
217
396
  }
218
397
 
219
- // String operators in conditions
220
- rule StringOperators
398
+ // ── Math module usage ─────────────────────────────────────────────────────────
399
+ rule High_Entropy_PE_Section : pe entropy packed
221
400
  {
401
+ meta:
402
+ description = "PE file with a high-entropy section indicating compression or encryption"
403
+ score = 60
404
+
222
405
  condition:
223
- pe.sections[0].name contains ".text" and
224
- pe.sections[0].name icontains ".TEXT" and
225
- pe.sections[0].name startswith "." and
226
- pe.sections[0].name istartswith "." and
227
- pe.sections[0].name endswith "ext" and
228
- pe.sections[0].name iendswith "EXT" and
229
- pe.sections[0].name iequals ".TEXT" and
230
- pe.sections[0].name matches /\.[a-z]+/
406
+ Is_PE_File and
407
+ for any section in pe.sections : (
408
+ math.entropy(section.raw_data_offset, section.raw_data_size) > 7.2
409
+ ) and
410
+ math.mean(0, 512) > 100.0 and
411
+ math.deviation(0, 512, math.MEAN_BYTES) > 64.0 and
412
+ math.in_range(math.entropy(0, filesize), 6.5, 8.0)
231
413
  }
232
414
 
233
- // Iterating over module data
234
- rule ModuleIteration
415
+ // ── dotnet module usage ───────────────────────────────────────────────────────
416
+ rule DotNet_Loader_Heuristic : dotnet loader
235
417
  {
418
+ meta:
419
+ description = "Detects .NET loader assemblies with suspicious class/method names"
420
+ score = 70
421
+
236
422
  condition:
237
- for any section in pe.sections : (
238
- section.name == ".text" and
239
- section.characteristics & 0x20000000 != 0
423
+ dotnet.is_dotnet and
424
+ dotnet.number_of_classes > 0 and
425
+ for any cls in dotnet.classes : (
426
+ cls.fullname contains "Loader" or
427
+ cls.fullname contains "Injector" or
428
+ for any method in cls.methods : (
429
+ method.name contains "Execute" or
430
+ method.name contains "Inject" or
431
+ method.name contains "Decrypt"
432
+ )
240
433
  )
241
434
  }
242
435
 
243
- // Using defined operator
244
- rule DefinedExample
436
+ // ── Time module usage ─────────────────────────────────────────────────────────
437
+ rule TimeCheck_Example : timebased
245
438
  {
439
+ meta:
440
+ description = "Rule that is only valid after a certain date (demonstration)"
441
+
246
442
  condition:
247
- defined pe.entry_point and
248
- pe.entry_point == 0x1000
443
+ time.now() > 1704067200
249
444
  }
250
445
 
251
- // Arithmetic and bitwise operators
252
- rule Operators
446
+ // ── Console module usage ──────────────────────────────────────────────────────
447
+ rule Debug_Console_Example
253
448
  {
449
+ meta:
450
+ description = "Demonstration of console module debug output"
451
+
254
452
  strings:
255
- $a = "test"
453
+ $mz = { 4D 5A }
256
454
 
257
455
  condition:
258
- #a > 5 + 3 and
259
- #a < 100 - 50 and
260
- #a != 10 * 2 and
261
- filesize \ 1024 < 100 and
262
- filesize % 512 == 0 and
263
- uint8(0) & 0xFF == 0x4D and
264
- uint8(0) | 0x00 == 0x4D and
265
- uint8(0) ^ 0x00 == 0x4D and
266
- ~uint8(0) != 0 and
267
- uint8(0) << 8 == 0x4D00 and
268
- uint16(0) >> 8 == 0x4D
456
+ $mz at 0 and
457
+ console.log("Checking PE file") and
458
+ console.log("Entropy:", math.entropy(0, filesize)) and
459
+ console.hex("First bytes:", uint32(0))
269
460
  }
270
461
 
271
- // Rule references
272
- rule Rule1
462
+ // ── String module usage ───────────────────────────────────────────────────────
463
+ rule String_Module_Example : string
273
464
  {
274
- strings:
275
- $a = "dummy1"
465
+ meta:
466
+ description = "Demonstrates string module functions"
276
467
 
277
468
  condition:
278
- $a
469
+ string.length("hello world") == 11 and
470
+ string.to_int("1337") == 1337 and
471
+ string.to_int("0xff", 16) == 255
279
472
  }
280
473
 
281
- rule Rule2
474
+ // ── Cuckoo module usage ───────────────────────────────────────────────────────
475
+ rule Cuckoo_Suspicious_Activity : cuckoo behavioral
282
476
  {
283
- strings:
284
- $a = "dummy2"
477
+ meta:
478
+ description = "Detects suspicious behavioral patterns from Cuckoo sandbox analysis"
479
+ score = 75
285
480
 
286
481
  condition:
287
- $a and Rule1
482
+ cuckoo.network.http_request(/evil\.example\.com\/payload/) and
483
+ cuckoo.network.dns_lookup(/evil\.example\.com/) and
484
+ cuckoo.network.tcp(/192\.168\.1\.1/, 4444) and
485
+ cuckoo.filesystem.file_access(/\\AppData\\Roaming\\[a-z]{8}\.exe/) and
486
+ cuckoo.registry.key_access(/HKCU\\Software\\Microsoft\\Windows\\CurrentVersion\\Run/) and
487
+ cuckoo.sync.mutex(/Global\\[A-F0-9]{32}/)
288
488
  }
289
489
 
290
- rule MainRule
490
+ // ── Magic module usage ────────────────────────────────────────────────────────
491
+ rule Magic_PDF_Dropper : pdf dropper
291
492
  {
493
+ meta:
494
+ description = "Detects PDF files with embedded executable content"
495
+ score = 70
496
+
497
+ strings:
498
+ $pdf_js = "/JavaScript" ascii
499
+ $pdf_aa = "/AA" ascii
500
+ $pdf_embed = "/EmbeddedFile" ascii
501
+
292
502
  condition:
293
- any of (Rule*)
503
+ magic.mime_type() == "application/pdf" and
504
+ magic.type() contains "PDF document" and
505
+ 2 of ($pdf*)
294
506
  }
295
507
 
296
- // Ranges in string sets
297
- rule RangesInSets
508
+ // ── LNK module usage ──────────────────────────────────────────────────────────
509
+ rule Malicious_LNK_Phishing : lnk phishing
298
510
  {
299
- strings:
300
- $a1 = "test1"
301
- $a2 = "test2"
302
- $b1 = "other"
511
+ meta:
512
+ description = "Detects malicious LNK files used in phishing campaigns"
513
+ score = 80
303
514
 
304
515
  condition:
305
- all of ($a*) in (filesize-500..filesize) and
306
- any of ($a*, $b*) in (1000..2000) and
307
- any of ($a*) at 0 and
308
- #a1 in (filesize-500..filesize) == 2
516
+ lnk.is_lnk and
517
+ lnk.file_size < 1MB and
518
+ lnk.link_info.has_volume_id and
519
+ (
520
+ lnk.command_line_arguments contains "powershell" or
521
+ lnk.command_line_arguments contains "cmd.exe" or
522
+ lnk.command_line_arguments contains "mshta"
523
+ ) and
524
+ (lnk.flags & lnk.HAS_ARGUMENTS) != 0
309
525
  }
310
526
 
311
- // Hash module usage
312
- rule HashExample
527
+ // ── Ranges in string sets ─────────────────────────────────────────────────────
528
+ rule Process_Injection_Sequence : injection
313
529
  {
530
+ meta:
531
+ description = "Detects classic process injection API sequence indicators"
532
+ score = 80
533
+
534
+ strings:
535
+ $api1 = "OpenProcess" ascii fullword
536
+ $api2 = "VirtualAllocEx" ascii fullword
537
+ $api3 = "WriteProcessMemory" ascii fullword
538
+ $api4 = "CreateRemoteThread" ascii fullword
539
+ $api5 = "NtCreateThreadEx" ascii fullword
540
+ $api6 = "RtlCreateUserThread" ascii fullword
541
+ $alt1 = "VirtualAlloc" ascii fullword
542
+ $alt2 = "LoadLibraryA" ascii fullword
543
+
314
544
  condition:
315
- hash.md5(0, filesize) == "d41d8cd98f00b204e9800998ecf8427e" and
316
- hash.sha256(0, filesize) != ""
545
+ Is_PE_File and
546
+ all of ($api1, $api2, $api3) in (0..filesize) and
547
+ any of ($api4, $api5, $api6) in (0..filesize) and
548
+ #api1 in (filesize-0x10000..filesize) == 0 and
549
+ any of ($alt*) at 0
317
550
  }
318
551
 
319
- // Math module usage
320
- rule MathExample
552
+ // ── Rule references ───────────────────────────────────────────────────────────
553
+ rule Loader_Stage_One : loader stage1
321
554
  {
555
+ meta:
556
+ description = "First stage loader: drops and executes a second payload"
557
+ score = 55
558
+
559
+ strings:
560
+ $dropper_path = "%TEMP%\\" ascii
561
+ $exec_cmd = "CreateProcess" ascii fullword
562
+
322
563
  condition:
323
- math.entropy(0, filesize) > 7.0
564
+ Is_PE_File and
565
+ $dropper_path and $exec_cmd
324
566
  }
325
567
 
326
- // For loop with text strings (YARA 4.3+)
327
- rule ForWithStrings
568
+ rule Loader_Stage_Two : loader stage2
328
569
  {
570
+ meta:
571
+ description = "Second stage loader that injects into a target process"
572
+ score = 75
573
+
574
+ strings:
575
+ $inject_api = "WriteProcessMemory" ascii fullword
576
+ $alloc_api = "VirtualAllocEx" ascii fullword
577
+
329
578
  condition:
330
- for any s in ("71b36345516e076a0663e0bea97759e4",
331
- "1e7f7edeb06de02f2c2a9319de99e033") : (
332
- hash.md5(0, filesize) == s
333
- )
579
+ Is_PE_File and
580
+ $inject_api and $alloc_api
334
581
  }
335
582
 
336
- // Time module
337
- rule TimeExample
583
+ rule Full_Infection_Chain : loader campaign
338
584
  {
585
+ meta:
586
+ description = "Matches a complete two-stage infection chain"
587
+ score = 90
588
+
339
589
  condition:
340
- time.now() > 1704067200
590
+ Loader_Stage_One and Loader_Stage_Two
341
591
  }
342
592
 
343
- // Complex real-world-like rule
344
- rule SuspiciousPEFile : malware suspicious
593
+ // ── Complex real-world rule ───────────────────────────────────────────────────
594
+ rule BlackEnergy_Driver : apt rootkit blackenergy
345
595
  {
346
596
  meta:
347
- description = "Detects suspicious PE files"
348
- author = "Security Analyst"
349
- severity = "high"
350
- score = 85
597
+ description = "Detects BlackEnergy v3 kernel driver component"
598
+ author = "APT Hunting Team"
599
+ reference = "https://securelist.com/be2-custom-plugins-router-abuse-and-target-profiles"
600
+ md5 = "b24c80c2e0c2b741ea4db8c7b5e79ad9"
601
+ score = 95
351
602
  in_the_wild = true
352
603
 
353
604
  strings:
354
- $mz = { 4D 5A }
355
- $pe = { 50 45 00 00 }
356
- $str1 = "CreateRemoteThread" ascii wide
357
- $str2 = "VirtualAllocEx" fullword
358
- $str3 = "WriteProcessMemory" nocase
359
- $url = /https?:\/\/[a-z0-9\.\-]+\.[a-z]{2,}/i
360
- $suspicious_hex = { 68 ?? ?? ?? ?? FF 15 ?? ?? ?? ?? 85 C0 74 [2-6] }
605
+ // Driver device name
606
+ $dev_name = "\\Device\\BlackEnergy" wide
607
+
608
+ // IOCTL dispatch signature
609
+ $ioctl_sig = { 8B FF 55 8B EC 8B 45 0C 83 E8 04 74 ?? 83 E8 04 74 ?? }
610
+
611
+ // Driver entry point pattern
612
+ $drv_entry = { 8D 4D F8 51 FF 15 ?? ?? ?? ?? 8B E5 5D C2 08 00 }
613
+
614
+ // Encrypted config blob
615
+ $enc_cfg = { 48 00 [2] 00 [2] 00 [2] 00 ?? ?? 00 00 }
616
+
617
+ // PDB path artifact
618
+ $pdb_path = "blackenergy" nocase
361
619
 
362
620
  condition:
363
- $mz at 0 and
364
- $pe in (0..1024) and
365
- filesize < 5MB and
366
- 2 of ($str*) and
367
- not $url and
621
+ uint16(0) == 0x5A4D and
622
+ uint32(uint32(0x3C)) == 0x00004550 and
623
+ filesize < 512KB and
624
+ pe.characteristics & pe.SYSTEM != 0 and
368
625
  (
369
- pe.entry_point < 0x1000 or
370
- pe.number_of_sections > 7 or
371
- for any section in pe.sections : (
372
- section.name == ".packed" or
373
- math.entropy(section.raw_data_offset, section.raw_data_size) > 7.5
374
- )
375
- )
626
+ ($dev_name and $ioctl_sig) or
627
+ ($drv_entry and $enc_cfg) or
628
+ ($pdb_path and $ioctl_sig)
629
+ ) and
630
+ pe.imphash() != "" and
631
+ math.entropy(0, filesize) > 5.0
376
632
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rouge-lexer-yara
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sean Whalen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-03-06 00:00:00.000000000 Z
11
+ date: 2026-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rouge
@@ -41,6 +41,8 @@ licenses:
41
41
  metadata:
42
42
  source_code_uri: https://github.com/seanthegeek/rouge-lexer-yara
43
43
  bug_tracker_uri: https://github.com/seanthegeek/rouge-lexer-yara/issues
44
+ documentation_uri: https://github.com/seanthegeek/rouge-lexer-yara#readme
45
+ homepage_uri: https://github.com/seanthegeek/rouge-lexer-yara
44
46
  post_install_message:
45
47
  rdoc_options: []
46
48
  require_paths: