rouge-lexer-yara 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7986144d60452ab72d171f7c645cf810e5922fd0a85496800c1d9911f569703c
4
+ data.tar.gz: 5333ed55cbc923af93359aeeac6342cf2cb2f84e5ce3ea4dfc6a3115811a622d
5
+ SHA512:
6
+ metadata.gz: e36b57cbcb23e585570ba4c83fe0873fb32221e18cb2ae02906af1aec2d7fa4c8e4149cd3da0791f83ec387adbe119ee9f83464d56e585ac08562d885de3d932
7
+ data.tar.gz: f8ddcb2702465c873cb81ab56e095162b462c71511f6c8974fb9e2396931524adb329af6a6744f92b0b353133ee54d208da04842923d872a953c593a5f33c346
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rouge'
4
+ require File.expand_path('../lexers/yara', __dir__)
@@ -0,0 +1,139 @@
1
+ # -*- coding: utf-8 -*- #
2
+ # frozen_string_literal: true
3
+
4
+ module Rouge
5
+ module Lexers
6
+ class YARA < RegexLexer
7
+ title 'YARA'
8
+ desc 'YARA malware pattern-matching rule language'
9
+ tag 'yara'
10
+ aliases 'yar'
11
+ filenames '*.yar', '*.yara'
12
+ mimetypes 'text/x-yara'
13
+
14
+ def self.detect?(text)
15
+ return true if text =~ /\A\s*(?:rule|import|include)\b/
16
+ end
17
+
18
+ def self.keywords
19
+ @keywords ||= Set.new %w(
20
+ all and any at condition contains defined endswith
21
+ entrypoint filesize for global icontains iendswith
22
+ iequals in istartswith matches meta none not of
23
+ or private startswith strings them
24
+ )
25
+ end
26
+
27
+ def self.keywords_declaration
28
+ @keywords_declaration ||= Set.new %w(
29
+ rule import include
30
+ )
31
+ end
32
+
33
+ def self.keywords_pseudo
34
+ @keywords_pseudo ||= Set.new %w(
35
+ ascii base64 base64wide fullword nocase wide xor
36
+ )
37
+ end
38
+
39
+ def self.builtins
40
+ @builtins ||= Set.new %w(
41
+ int8 int16 int32 uint8 uint16 uint32
42
+ int8be int16be int32be uint8be uint16be uint32be
43
+ )
44
+ end
45
+
46
+ state :root do
47
+ rule %r/\s+/, Text::Whitespace
48
+ rule %r(//.*$), Comment::Single
49
+ rule %r(/\*), Comment::Multiline, :multiline_comment
50
+
51
+ # section labels: meta: strings: condition:
52
+ rule %r/(meta|strings|condition)(\s*)(:)/ do
53
+ groups Name::Label, Text::Whitespace, Punctuation
54
+ end
55
+
56
+ # hex string entry: = { hex_content }
57
+ rule %r/(=)(\s*)(\{)/m do
58
+ groups Operator, Text::Whitespace, Str::Other
59
+ push :hex_string
60
+ end
61
+
62
+ # double-quoted strings
63
+ rule %r/"/, Str::Double, :string
64
+
65
+ # regex literals
66
+ rule %r(/) do
67
+ token Str::Regex
68
+ push :regex
69
+ end
70
+
71
+ # variables: $ident, #ident, @ident, !ident, bare $
72
+ rule %r/[#@!]\w+/, Name::Variable
73
+ rule %r/\$\w*/, Name::Variable
74
+
75
+ # hex numbers
76
+ rule %r/0x[0-9a-fA-F]+/, Num::Hex
77
+ # decimal numbers with optional size suffix
78
+ rule %r/\d+(?:KB|MB)?/, Num::Integer
79
+
80
+ # range operator
81
+ rule %r/\.\./, Operator
82
+
83
+ # multi-character operators
84
+ rule %r/==|!=|<=|>=|<<|>>/, Operator
85
+ # single-character operators
86
+ rule %r/[+\-*\\%&|^~<>=]/, Operator
87
+
88
+ # punctuation
89
+ rule %r/[{}()\[\]:.,]/, Punctuation
90
+
91
+ # word classification
92
+ rule %r/\w+/ do |m|
93
+ if self.class.keywords_declaration.include?(m[0])
94
+ token Keyword::Declaration
95
+ elsif self.class.keywords_pseudo.include?(m[0])
96
+ token Keyword::Pseudo
97
+ elsif m[0] == 'true' || m[0] == 'false'
98
+ token Keyword::Constant
99
+ elsif self.class.builtins.include?(m[0])
100
+ token Name::Builtin
101
+ elsif self.class.keywords.include?(m[0])
102
+ token Keyword
103
+ else
104
+ token Name
105
+ end
106
+ end
107
+ end
108
+
109
+ state :multiline_comment do
110
+ rule %r([*]/), Comment::Multiline, :pop!
111
+ rule %r([^*]+), Comment::Multiline
112
+ rule %r([*]), Comment::Multiline
113
+ end
114
+
115
+ state :string do
116
+ rule %r/\\./, Str::Escape
117
+ rule %r/"/, Str::Double, :pop!
118
+ rule %r/[^\\"]+/, Str::Double
119
+ end
120
+
121
+ state :hex_string do
122
+ rule %r/\s+/, Text::Whitespace
123
+ rule %r/\}/, Str::Other, :pop!
124
+ rule %r(//.*$), Comment::Single
125
+ rule %r(/\*), Comment::Multiline, :multiline_comment
126
+ rule %r/[0-9a-fA-F?]{2}/, Str::Other
127
+ rule %r/~/, Operator
128
+ rule %r/\[[\d\-]*\]/, Str::Other
129
+ rule %r/[|()]/, Punctuation
130
+ end
131
+
132
+ state :regex do
133
+ rule %r/\\./, Str::Regex
134
+ rule %r(/[is]*), Str::Regex, :pop!
135
+ rule %r([^\\/]+), Str::Regex
136
+ end
137
+ end
138
+ end
139
+ end
data/spec/demos/yara ADDED
@@ -0,0 +1,19 @@
1
+ import "pe"
2
+
3
+ rule ExampleRule : demo
4
+ {
5
+ meta:
6
+ author = "Rouge"
7
+ score = 75
8
+ is_demo = true
9
+
10
+ strings:
11
+ $text = "malware" ascii wide nocase
12
+ $hex = { 4D 5A 90 00 ?? [4-8] 50 45 }
13
+ $re = /md5: [0-9a-fA-F]{32}/i
14
+
15
+ condition:
16
+ filesize < 5MB and
17
+ uint16(0) == 0x5A4D and
18
+ any of them
19
+ }
@@ -0,0 +1,376 @@
1
+ /*
2
+ YARA Visual Sample
3
+ Covers a wide range of YARA syntax elements.
4
+ */
5
+
6
+ // Import statements
7
+ import "pe"
8
+ import "elf"
9
+ import "hash"
10
+ import "math"
11
+ import "dotnet"
12
+ import "time"
13
+ import "console"
14
+ import "string"
15
+
16
+ // Include statement
17
+ include "other_rules.yar"
18
+ include "./includes/more_rules.yar"
19
+
20
+ // Simple rule with no strings
21
+ rule AlwaysFalse
22
+ {
23
+ condition:
24
+ false
25
+ }
26
+
27
+ // Basic rule with tags and metadata
28
+ rule BasicExample : Tag1 Tag2 ExampleTag
29
+ {
30
+ meta:
31
+ description = "A basic YARA rule example"
32
+ author = "Rouge Lexer Test"
33
+ date = "2024-01-15"
34
+ version = 1
35
+ is_test = true
36
+ score = 75
37
+
38
+ strings:
39
+ $text1 = "This is a test string"
40
+ $text2 = "Another string with escapes: \t\n\r\\\""
41
+ $text3 = "Hex escape: \x4D\x5A"
42
+
43
+ condition:
44
+ any of them
45
+ }
46
+
47
+ // String modifiers
48
+ rule StringModifiers
49
+ {
50
+ strings:
51
+ $wide_str = "Borland" wide
52
+ $ascii_str = "text" ascii
53
+ $wide_ascii = "mixed" wide ascii
54
+ $nocase_str = "foobar" nocase
55
+ $fullword_str = "domain" fullword
56
+ $xor_str = "This program cannot" xor
57
+ $xor_range = "encrypted" xor(0x01-0xff)
58
+ $base64_str = "This program cannot" base64
59
+ $base64wide_str = "secret" base64wide
60
+ $base64_custom = "data" base64("!@#$%^&*(){}[].,|ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstu")
61
+ $combined = "malware" wide ascii nocase fullword
62
+ $private_str = "hidden" private
63
+
64
+ condition:
65
+ any of them
66
+ }
67
+
68
+ // Hexadecimal strings with various features
69
+ rule HexStrings
70
+ {
71
+ strings:
72
+ // Basic hex string
73
+ $hex1 = { E2 34 A1 C8 23 FB }
74
+
75
+ // Wild-cards
76
+ $hex2 = { E2 34 ?? C8 A? FB }
77
+
78
+ // Not operator
79
+ $hex3 = { F4 23 ~00 62 B4 }
80
+ $hex4 = { F4 23 ~?0 62 B4 }
81
+
82
+ // Jumps
83
+ $hex5 = { F4 23 [4-6] 62 B4 }
84
+ $hex6 = { FE 39 45 [6] 89 00 }
85
+ $hex7 = { FE 39 45 [10-] 89 00 }
86
+ $hex8 = { FE 39 45 [-] 89 00 }
87
+
88
+ // Alternatives
89
+ $hex9 = { F4 23 ( 62 B4 | 56 ) 45 }
90
+ $hex10 = { F4 23 ( 62 B4 | 56 | 45 ?? 67 ) 45 }
91
+
92
+ condition:
93
+ any of ($hex*)
94
+ }
95
+
96
+ // Regular expressions
97
+ rule RegexPatterns
98
+ {
99
+ strings:
100
+ $re1 = /md5: [0-9a-fA-F]{32}/
101
+ $re2 = /state: (on|off)/
102
+ $re3 = /foo/i
103
+ $re4 = /bar./s
104
+ $re5 = /baz./is
105
+ $re6 = /https?:\/\/[^\s]+/
106
+ $re7 = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/
107
+
108
+ condition:
109
+ any of them
110
+ }
111
+
112
+ // Global rule
113
+ global rule SizeLimit
114
+ {
115
+ condition:
116
+ filesize < 2MB
117
+ }
118
+
119
+ // Private rule
120
+ private rule IsExecutable
121
+ {
122
+ condition:
123
+ uint16(0) == 0x5A4D
124
+ }
125
+
126
+ // Global private rule
127
+ global private rule NotTooLarge
128
+ {
129
+ condition:
130
+ filesize < 10MB
131
+ }
132
+
133
+ // Counting strings and offsets
134
+ rule CountingAndOffsets
135
+ {
136
+ strings:
137
+ $a = "dummy1"
138
+ $b = "dummy2"
139
+
140
+ condition:
141
+ #a == 6 and #b > 10 and
142
+ $a at 100 and
143
+ $b in (100..filesize) and
144
+ @a[1] < 1000 and
145
+ !a[1] > 5
146
+ }
147
+
148
+ // Data access functions
149
+ rule DataAccess
150
+ {
151
+ condition:
152
+ // MZ signature and PE signature
153
+ uint16(0) == 0x5A4D and
154
+ uint32(uint32(0x3C)) == 0x00004550 and
155
+ int8(0) == 0x4D and
156
+ int16(0) == 0x5A4D and
157
+ int32(0) != 0 and
158
+ uint8(0) == 0x4D and
159
+ int8be(0) == 0x4D and
160
+ int16be(0) == 0x4D5A and
161
+ int32be(0) != 0 and
162
+ uint8be(0) == 0x4D and
163
+ uint16be(0) == 0x4D5A and
164
+ uint32be(0) != 0
165
+ }
166
+
167
+ // Sets of strings and quantifiers
168
+ rule StringSets
169
+ {
170
+ strings:
171
+ $foo1 = "foo1"
172
+ $foo2 = "foo2"
173
+ $foo3 = "foo3"
174
+ $bar1 = "bar1"
175
+ $bar2 = "bar2"
176
+
177
+ condition:
178
+ 2 of ($foo*) and
179
+ all of them and
180
+ any of ($bar*) and
181
+ none of ($foo*) or
182
+ 1 of ($*)
183
+ }
184
+
185
+ // Anonymous strings
186
+ rule AnonymousStrings
187
+ {
188
+ strings:
189
+ $ = "dummy1"
190
+ $ = "dummy2"
191
+
192
+ condition:
193
+ 1 of them
194
+ }
195
+
196
+ // For loops and iterators
197
+ rule ForLoops
198
+ {
199
+ strings:
200
+ $a = "dummy1"
201
+ $b = "dummy2"
202
+
203
+ condition:
204
+ for all i in (1,2,3) : ( @a[i] + 10 == @b[i] ) and
205
+ for all i in (1..#a) : ( @a[i] < 100 ) and
206
+ for any of ($a,$b) : ( $ at 0 ) and
207
+ for 2 i in (1..#a) : ( @a[i] < 100 )
208
+ }
209
+
210
+ // Using PE module
211
+ rule PEModuleExample
212
+ {
213
+ condition:
214
+ pe.entry_point == 0x1000 and
215
+ pe.number_of_sections > 3 and
216
+ pe.characteristics & pe.DLL != 0
217
+ }
218
+
219
+ // String operators in conditions
220
+ rule StringOperators
221
+ {
222
+ condition:
223
+ pe.sections[0].name contains ".text" and
224
+ pe.sections[0].name icontains ".TEXT" and
225
+ pe.sections[0].name startswith "." and
226
+ pe.sections[0].name istartswith "." and
227
+ pe.sections[0].name endswith "ext" and
228
+ pe.sections[0].name iendswith "EXT" and
229
+ pe.sections[0].name iequals ".TEXT" and
230
+ pe.sections[0].name matches /\.[a-z]+/
231
+ }
232
+
233
+ // Iterating over module data
234
+ rule ModuleIteration
235
+ {
236
+ condition:
237
+ for any section in pe.sections : (
238
+ section.name == ".text" and
239
+ section.characteristics & 0x20000000 != 0
240
+ )
241
+ }
242
+
243
+ // Using defined operator
244
+ rule DefinedExample
245
+ {
246
+ condition:
247
+ defined pe.entry_point and
248
+ pe.entry_point == 0x1000
249
+ }
250
+
251
+ // Arithmetic and bitwise operators
252
+ rule Operators
253
+ {
254
+ strings:
255
+ $a = "test"
256
+
257
+ condition:
258
+ #a > 5 + 3 and
259
+ #a < 100 - 50 and
260
+ #a != 10 * 2 and
261
+ filesize \ 1024 < 100 and
262
+ filesize % 512 == 0 and
263
+ uint8(0) & 0xFF == 0x4D and
264
+ uint8(0) | 0x00 == 0x4D and
265
+ uint8(0) ^ 0x00 == 0x4D and
266
+ ~uint8(0) != 0 and
267
+ uint8(0) << 8 == 0x4D00 and
268
+ uint16(0) >> 8 == 0x4D
269
+ }
270
+
271
+ // Rule references
272
+ rule Rule1
273
+ {
274
+ strings:
275
+ $a = "dummy1"
276
+
277
+ condition:
278
+ $a
279
+ }
280
+
281
+ rule Rule2
282
+ {
283
+ strings:
284
+ $a = "dummy2"
285
+
286
+ condition:
287
+ $a and Rule1
288
+ }
289
+
290
+ rule MainRule
291
+ {
292
+ condition:
293
+ any of (Rule*)
294
+ }
295
+
296
+ // Ranges in string sets
297
+ rule RangesInSets
298
+ {
299
+ strings:
300
+ $a1 = "test1"
301
+ $a2 = "test2"
302
+ $b1 = "other"
303
+
304
+ condition:
305
+ all of ($a*) in (filesize-500..filesize) and
306
+ any of ($a*, $b*) in (1000..2000) and
307
+ any of ($a*) at 0 and
308
+ #a1 in (filesize-500..filesize) == 2
309
+ }
310
+
311
+ // Hash module usage
312
+ rule HashExample
313
+ {
314
+ condition:
315
+ hash.md5(0, filesize) == "d41d8cd98f00b204e9800998ecf8427e" and
316
+ hash.sha256(0, filesize) != ""
317
+ }
318
+
319
+ // Math module usage
320
+ rule MathExample
321
+ {
322
+ condition:
323
+ math.entropy(0, filesize) > 7.0
324
+ }
325
+
326
+ // For loop with text strings (YARA 4.3+)
327
+ rule ForWithStrings
328
+ {
329
+ condition:
330
+ for any s in ("71b36345516e076a0663e0bea97759e4",
331
+ "1e7f7edeb06de02f2c2a9319de99e033") : (
332
+ hash.md5(0, filesize) == s
333
+ )
334
+ }
335
+
336
+ // Time module
337
+ rule TimeExample
338
+ {
339
+ condition:
340
+ time.now() > 1704067200
341
+ }
342
+
343
+ // Complex real-world-like rule
344
+ rule SuspiciousPEFile : malware suspicious
345
+ {
346
+ meta:
347
+ description = "Detects suspicious PE files"
348
+ author = "Security Analyst"
349
+ severity = "high"
350
+ score = 85
351
+ in_the_wild = true
352
+
353
+ strings:
354
+ $mz = { 4D 5A }
355
+ $pe = { 50 45 00 00 }
356
+ $str1 = "CreateRemoteThread" ascii wide
357
+ $str2 = "VirtualAllocEx" fullword
358
+ $str3 = "WriteProcessMemory" nocase
359
+ $url = /https?:\/\/[a-z0-9\.\-]+\.[a-z]{2,}/i
360
+ $suspicious_hex = { 68 ?? ?? ?? ?? FF 15 ?? ?? ?? ?? 85 C0 74 [2-6] }
361
+
362
+ condition:
363
+ $mz at 0 and
364
+ $pe in (0..1024) and
365
+ filesize < 5MB and
366
+ 2 of ($str*) and
367
+ not $url and
368
+ (
369
+ pe.entry_point < 0x1000 or
370
+ pe.number_of_sections > 7 or
371
+ for any section in pe.sections : (
372
+ section.name == ".packed" or
373
+ math.entropy(section.raw_data_offset, section.raw_data_size) > 7.5
374
+ )
375
+ )
376
+ }
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rouge-lexer-yara
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Sean Whalen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-03-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rouge
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '3.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '3.0'
27
+ description: A Rouge plugin providing syntax highlighting for YARA malware pattern-matching
28
+ rule language
29
+ email:
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - lib/rouge/lexer/yara.rb
35
+ - lib/rouge/lexers/yara.rb
36
+ - spec/demos/yara
37
+ - spec/visual/samples/yara
38
+ homepage: https://github.com/seanthegeek/rouge-lexer-yara
39
+ licenses:
40
+ - MIT
41
+ metadata:
42
+ source_code_uri: https://github.com/seanthegeek/rouge-lexer-yara
43
+ bug_tracker_uri: https://github.com/seanthegeek/rouge-lexer-yara/issues
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '3.0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubygems_version: 3.4.20
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: Rouge lexer for YARA
63
+ test_files: []