rouge-lexer-spl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 95bcba36175336b5f4bdfb541516536df46990fb43bf2530690815c758ee96bf
4
+ data.tar.gz: 55818398ba0266efba91869c7d5f508724421d5165c90a0cce98c5e285b812e2
5
+ SHA512:
6
+ metadata.gz: ef14e53f0f449fa595f51a3227c2fdef71fbd08875f056b57827a01da3f2f4ecd7b50182304fddd948ce8d19376a915e707657399f78f1f43b58bc319e8e4c45
7
+ data.tar.gz: cf377703f943a0dc81a2c4802772f82aaa382e9830db8deae252dc16b70fb93e770f954e4f6070013a5be3ff05997caab445fa73f12251d0b351cb455ae14cb5
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rouge'
4
+ require File.expand_path('../lexers/spl', __dir__)
@@ -0,0 +1,200 @@
1
+ # -*- coding: utf-8 -*- #
2
+ # frozen_string_literal: true
3
+
4
+ module Rouge
5
+ module Lexers
6
+ class SPL < RegexLexer
7
+ title "SPL"
8
+ desc "Splunk Search Processing Language (SPL)"
9
+ tag 'spl'
10
+ aliases 'splunk', 'splunk-spl'
11
+ filenames '*.spl', '*.splunk'
12
+ mimetypes 'text/x-spl'
13
+
14
+ def self.detect?(text)
15
+ return true if text =~ /^\s*\|\s*(stats|eval|table|search|where|rex|rename|fields|sort|dedup|timechart|chart|head|tail)\b/i
16
+ return true if text =~ /\bindex\s*=\s*\w+/i && text =~ /\bsourcetype\s*=\s*/i
17
+ end
18
+
19
+ # SPL commands (from Splunk command quick reference)
20
+ def self.commands
21
+ @commands ||= Set.new %w(
22
+ abstract accum addcoltotals addinfo addtotals analyzefields
23
+ anomalies anomalousvalue anomalydetection append appendcols
24
+ appendpipe arules associate autoregress bin bucket bucketdir
25
+ chart cluster cofilter collect concurrency contingency convert
26
+ correlate datamodel dbinspect dedup delete delta diff erex eval
27
+ eventcount eventstats extract filldown fillnull findtypes
28
+ folderize foreach format from gauge gentimes geom geomfilter
29
+ geostats head highlight history iconify inputcsv inputlookup
30
+ iplocation join kmeans kvform loadjob localize localop lookup
31
+ makecontinuous makemv makeresults map mcollect metadata
32
+ metasearch meventcollect mpreview msearch mstats multikv
33
+ multisearch mvcombine mvexpand nomv outlier outputcsv
34
+ outputlookup outputtext overlap pivot predict rangemap rare
35
+ redistribute regex reltime rename replace require rest return
36
+ reverse rex rtorder savedsearch script run scrub search
37
+ searchtxn selfjoin sendalert sendemail set setfields sichart
38
+ sirare sistats sitimechart sitop sort spath stats strcat
39
+ streamstats table tags tail timechart timewrap tojson top
40
+ transaction transpose trendline tscollect tstats typeahead
41
+ typelearner typer union uniq untable walklex where x11 xmlkv
42
+ xmlunescape xpath xyseries kv
43
+ )
44
+ end
45
+
46
+ # Evaluation functions (from Splunk evaluation functions reference)
47
+ def self.eval_functions
48
+ @eval_functions ||= Set.new %w(
49
+ abs acos acosh asin asinh atan atan2 atanh avg
50
+ bit_and bit_or bit_not bit_xor bit_shift_left bit_shift_right
51
+ case cidrmatch ceiling coalesce commands cos cosh exact exp
52
+ false floor hypot if in ipmask isarray isbool isdouble isint
53
+ ismv isnotnull isnull isnum isobject isstr json json_append
54
+ json_array json_array_to_mv json_delete json_entries
55
+ json_extend json_extract json_extract_exact json_has_key_exact
56
+ json_keys json_object json_set json_set_exact json_valid len
57
+ like ln log lower ltrim match max md5 min mvappend mvcount
58
+ mvdedup mvfilter mvfind mvindex mvjoin mvmap mvrange mvsort
59
+ mvzip mv_to_json_array now null nullif pi pow printf random
60
+ relative_time replace round rtrim searchmatch sha1 sha256
61
+ sha512 sigfig sin sinh split sqrt strftime strptime substr sum
62
+ tan tanh time toarray tobool todouble toint tomv tonumber
63
+ toobject tostring trim true typeof upper urldecode validate
64
+ )
65
+ end
66
+
67
+ # Statistical and charting functions (from Splunk stats functions reference)
68
+ def self.stats_functions
69
+ @stats_functions ||= Set.new %w(
70
+ avg count distinct_count dc estdc estdc_error exactperc max
71
+ mean median min mode perc percentile range stdev stdevp sum
72
+ sumsq upperperc var varp first last list values earliest
73
+ earliest_time latest latest_time per_day per_hour per_minute
74
+ per_second rate rate_avg rate_sum sparkline
75
+ )
76
+ end
77
+
78
+ # Operator keywords
79
+ def self.operator_words
80
+ @operator_words ||= Set.new %w(
81
+ AND OR NOT XOR IN LIKE BY AS OVER OUTPUT OUTPUTNEW WHERE
82
+ )
83
+ end
84
+
85
+ # Constants
86
+ def self.constants
87
+ @constants ||= Set.new %w(
88
+ true false TRUE FALSE null NULL
89
+ )
90
+ end
91
+
92
+ # Built-in / internal fields
93
+ def self.builtin_fields
94
+ @builtin_fields ||= Set.new %w(
95
+ _time _raw _indextime _cd _serial _bkt _si _sourcetype
96
+ _subsecond _kv host source sourcetype index splunk_server
97
+ linecount punct timeendpos timestartpos eventtype tag
98
+ date_hour date_mday date_minute date_month date_second
99
+ date_wday date_year date_zone
100
+ )
101
+ end
102
+
103
+ state :root do
104
+ # Whitespace
105
+ rule %r/\s+/m, Text
106
+
107
+ # Block comments (triple backtick)
108
+ rule %r/```/, Comment::Multiline, :block_comment
109
+
110
+ # Single-line comments (starting with ` followed by content)
111
+ # SPL doesn't have single-line comments in the traditional sense
112
+
113
+ # Double-quoted strings
114
+ rule %r/"/, Str::Double, :double_string
115
+
116
+ # Single-quoted strings (field names)
117
+ rule %r/'/, Str::Single, :single_string
118
+
119
+ # Backtick-quoted macros/saved searches (not triple)
120
+ rule %r/`(?!``)/, Name::Function, :backtick_string
121
+
122
+ # Numeric literals
123
+ rule %r/-?\d+\.\d+(?:e[+-]?\d+)?/i, Num::Float
124
+ rule %r/-?\d+(?:e[+-]?\d+)?/i, Num::Integer
125
+
126
+ # Time modifiers (e.g., -24h@h, +7d@d, -30m, now)
127
+ rule %r/[+-]\d+[smhdwqy](?:@[smhdwqy])?/i, Literal::Date
128
+
129
+ # Subsearch brackets
130
+ rule %r/[\[\]]/, Punctuation
131
+
132
+ # Pipe operator
133
+ rule %r/\|/, Punctuation
134
+
135
+ # Comparison and assignment operators
136
+ rule %r/[<>!=]=?/, Operator
137
+ rule %r/==/, Operator
138
+
139
+ # Arithmetic and string concatenation operators
140
+ rule %r/[+\-*\/%]/, Operator
141
+ rule %r/\.\./, Operator
142
+ rule %r/\.(?!\w)/, Operator
143
+
144
+ # Other punctuation
145
+ rule %r/[(),;]/, Punctuation
146
+
147
+ # Equals sign (assignment / field=value)
148
+ rule %r/=/, Operator
149
+
150
+ # Wildcard
151
+ rule %r/\*/, Operator
152
+
153
+ # Words — classify by set membership
154
+ rule %r/\w+/ do |m|
155
+ word = m[0]
156
+ word_upper = word.upcase
157
+ word_lower = word.downcase
158
+ if self.class.constants.include? word
159
+ token Keyword::Constant
160
+ elsif self.class.operator_words.include? word_upper
161
+ token Keyword::Pseudo
162
+ elsif self.class.commands.include? word_lower
163
+ token Keyword
164
+ elsif self.class.eval_functions.include? word_lower
165
+ token Name::Builtin
166
+ elsif self.class.stats_functions.include? word_lower
167
+ token Name::Builtin
168
+ elsif self.class.builtin_fields.include? word_lower
169
+ token Name::Variable::Magic
170
+ else
171
+ token Name
172
+ end
173
+ end
174
+ end
175
+
176
+ state :block_comment do
177
+ rule %r/```/, Comment::Multiline, :pop!
178
+ rule %r/[^`]+/, Comment::Multiline
179
+ rule %r/`/, Comment::Multiline
180
+ end
181
+
182
+ state :double_string do
183
+ rule %r/\\./, Str::Escape
184
+ rule %r/"/, Str::Double, :pop!
185
+ rule %r/[^\\"]+/, Str::Double
186
+ end
187
+
188
+ state :single_string do
189
+ rule %r/\\./, Str::Escape
190
+ rule %r/'/, Str::Single, :pop!
191
+ rule %r/[^\\']+/, Str::Single
192
+ end
193
+
194
+ state :backtick_string do
195
+ rule %r/`/, Name::Function, :pop!
196
+ rule %r/[^`]+/, Name::Function
197
+ end
198
+ end
199
+ end
200
+ end
data/spec/demos/spl ADDED
@@ -0,0 +1,12 @@
1
+ ``` This is a block comment in SPL ```
2
+ index=main sourcetype=access_combined earliest=-24h@h latest=now
3
+ | search status>=400 NOT status=200
4
+ | eval error_type=case(status>=500, "Server Error", status>=400, "Client Error", true(), "Other")
5
+ | stats count AS error_count, values(uri_path) AS paths BY error_type, host
6
+ | where error_count > 10
7
+ | eval message=if(error_count>100, "CRITICAL", "WARNING")
8
+ | eval hash=md5(host)
9
+ | rename error_count AS "Total Errors"
10
+ | sort -error_count
11
+ | table error_type, host, "Total Errors", paths, message
12
+ | head 20
@@ -0,0 +1,248 @@
1
+ ```
2
+ This is a block comment in SPL.
3
+ It can span multiple lines.
4
+ Use triple backticks to start and end.
5
+ ```
6
+
7
+ ``` Short block comment ```
8
+
9
+ index=main sourcetype=syslog host=webserver01
10
+
11
+ index=firewall sourcetype=cisco:asa action=blocked src_ip=10.0.0.*
12
+
13
+ sourcetype=access_combined status>=400
14
+
15
+ error OR fail OR critical
16
+
17
+ status=200 AND method=GET
18
+
19
+ source="/var/log/messages" NOT debug
20
+
21
+ index=main sourcetype=access_combined earliest=-24h@h latest=now
22
+ | search status>=400
23
+ | stats count BY status, host
24
+
25
+ index=main earliest=-7d@d latest=now
26
+ | timechart span=1h count BY sourcetype
27
+
28
+ index=main earliest=-30m latest=now
29
+ | eval duration=round(response_time, 2)
30
+
31
+ | eval severity=case(
32
+ status>=500, "critical",
33
+ status>=400, "error",
34
+ status>=300, "redirect",
35
+ status>=200, "success",
36
+ true(), "unknown"
37
+ )
38
+
39
+ | eval is_local=if(cidrmatch("10.0.0.0/8", src_ip), "yes", "no")
40
+
41
+ | eval combined=mvjoin(mvappend(field1, field2), ",")
42
+
43
+ | eval extracted=replace(uri_path, "^/api/v\d+/", "")
44
+
45
+ | eval request_time=strftime(_time, "%Y-%m-%d %H:%M:%S")
46
+ | eval epoch=strptime("2024-01-15 08:30:00", "%Y-%m-%d %H:%M:%S")
47
+
48
+ | eval checksum=md5(raw_data)
49
+ | eval hash=sha256(username)
50
+
51
+ | eval avg_val=avg(field1, field2, field3)
52
+ | eval max_val=max(bytes_in, bytes_out)
53
+ | eval min_val=min(response_time)
54
+
55
+ | eval item_count=mvcount(values)
56
+ | eval unique_vals=mvdedup(myfield)
57
+ | eval filtered=mvfilter(match(mvfield, "error"))
58
+ | eval first_three=mvindex(myfield, 0, 2)
59
+ | eval combined=mvzip(names, scores, ":")
60
+
61
+ | eval field_len=len(message)
62
+ | eval lower_host=lower(host)
63
+ | eval upper_method=upper(method)
64
+ | eval trimmed=trim(raw, " \t")
65
+ | eval part=substr(uri_path, 1, 10)
66
+
67
+ | eval x=abs(-42)
68
+ | eval area=pi() * pow(radius, 2)
69
+ | eval log_val=log(bytes, 10)
70
+ | eval root=sqrt(variance)
71
+
72
+ | eval angle_rad=acos(0.5)
73
+ | eval sine=sin(3.14159)
74
+ | eval tangent=atan2(y, x)
75
+
76
+ | eval is_number=isnum(field1)
77
+ | eval not_empty=isnotnull(field2)
78
+ | eval field_type=typeof(myfield)
79
+
80
+ | eval obj=json_object("name", host, "status", status)
81
+ | eval val=json_extract(payload, "user.name")
82
+ | eval keys=json_keys(my_json)
83
+ | eval is_valid=json_valid(raw_json)
84
+
85
+ | eval int_val=tonumber("42")
86
+ | eval str_val=tostring(status)
87
+ | eval bool_val=tobool("true")
88
+
89
+ | eval t=time()
90
+ | eval search_start=now()
91
+ | eval adjusted=relative_time(now(), "-1d@d")
92
+
93
+ | eval safe_val=coalesce(field1, field2, "default")
94
+ | eval result=nullif(field1, field2)
95
+ | eval val=null()
96
+
97
+ | eval rand_num=random()
98
+
99
+ index=main sourcetype=access_combined
100
+ | stats count AS total_requests,
101
+ avg(response_time) AS avg_response,
102
+ max(response_time) AS max_response,
103
+ min(response_time) AS min_response,
104
+ dc(clientip) AS unique_clients,
105
+ sum(bytes) AS total_bytes,
106
+ stdev(response_time) AS std_dev,
107
+ median(response_time) AS median_response,
108
+ range(response_time) AS response_range,
109
+ mode(status) AS common_status,
110
+ var(response_time) AS variance,
111
+ first(clientip) AS first_client,
112
+ last(clientip) AS last_client,
113
+ list(uri_path) AS all_paths,
114
+ values(method) AS methods,
115
+ earliest(_time) AS first_seen,
116
+ latest(_time) AS last_seen,
117
+ perc95(response_time) AS p95
118
+ BY host, sourcetype
119
+
120
+ index=main
121
+ | timechart span=1h count BY status
122
+ | timechart per_hour(bytes) BY host
123
+
124
+ index=main
125
+ | chart count OVER status BY host
126
+
127
+ index=main sourcetype=access_combined
128
+ | eventstats avg(response_time) AS global_avg BY host
129
+
130
+ index=main sourcetype=access_combined
131
+ | streamstats window=10 avg(response_time) AS rolling_avg
132
+
133
+ | rex field=_raw "user=(?<username>\w+)"
134
+ | rex field=uri_path "^/api/(?<api_version>v\d+)/(?<endpoint>\w+)"
135
+ | regex _raw="error|fail|exception"
136
+
137
+ index=main
138
+ | append [search index=summary sourcetype=report]
139
+ | join type=outer host [search index=assets | fields host, location, owner]
140
+
141
+ index=main
142
+ | eval category=case(status>=500, "error", status>=200, "ok")
143
+ | stats count BY category
144
+ | append [
145
+ search index=summary report_type=baseline
146
+ | stats avg(count) AS baseline BY category
147
+ ]
148
+
149
+ `my_saved_macro`
150
+ `generate_report(host, "2024-01-01", "2024-12-31")`
151
+
152
+ | search index=main `my_index_macro`
153
+
154
+ index=main sourcetype=access_combined
155
+ | fields host, source, sourcetype, _time, _raw, status
156
+ | table host source sourcetype status
157
+
158
+ | rename old_field AS new_field
159
+ | rename "Long Field Name" AS short_name, count AS total
160
+
161
+ | sort -count, +host
162
+ | sort 0 -num(bytes)
163
+
164
+ | dedup host, sourcetype
165
+
166
+ | head 100
167
+ | tail 50
168
+ | reverse
169
+
170
+ | fillnull value=0 count response_time
171
+ | filldown hostname
172
+
173
+ | where isnotnull(error_message)
174
+ | where like(uri_path, "/api/%")
175
+ | where status IN (200, 301, 404, 500)
176
+
177
+ | convert timeformat="%Y-%m-%d" ctime(_time) AS event_date
178
+ | convert rmunit(percent_cpu)
179
+
180
+ status!=200 AND (method="POST" OR method="PUT") NOT url="*/health*"
181
+
182
+ | eval rate=round(bytes/1024/1024, 2)
183
+ | eval ratio=if(total>0, success/total*100, 0)
184
+ | eval flag=if(count>threshold AND severity="high", 1, 0)
185
+
186
+ | eval msg="Hello \"world\""
187
+
188
+ sourcetype="access_*" index=web_logs
189
+ host='prod-web-*'
190
+
191
+ | eval x=10 + 20 - 5 * 3 / 2 % 4
192
+
193
+ true false TRUE FALSE null NULL
194
+
195
+ _time _raw host source sourcetype index
196
+
197
+ | transaction host maxspan=5m maxpause=30s
198
+ | iplocation clientip
199
+ | geostats latfield=lat longfield=lon count BY status
200
+
201
+ | predict response_time AS predicted_response future_timespan=24
202
+ | trendline sma5(response_time) AS trend
203
+ | anomalydetection field=response_time
204
+
205
+ | makeresults count=10
206
+ | eval random_val=random() % 100
207
+ | outputlookup my_lookup.csv
208
+
209
+ | inputlookup my_reference.csv
210
+ | lookup geo_lookup ip AS src_ip OUTPUT city, country
211
+
212
+ index=main
213
+ | stats count BY host
214
+ | where count > [search index=summary | stats avg(count) AS threshold | return $threshold]
215
+
216
+ | map search="search index=main host=$host$ | stats count"
217
+
218
+ | multikv forceheader=1
219
+ | makemv delim="," values
220
+ | mvexpand values
221
+
222
+ | foreach * [eval <<FIELD>>=if(isnull(<<FIELD>>), 0, <<FIELD>>)]
223
+ | eval result=printf("%s has %d errors (%.2f%%)", host, count, pct)
224
+ | strcat source ":" sourcetype full_source
225
+
226
+ | spath input=json_data path=results{}
227
+
228
+ earliest=-24h@h latest=now
229
+ earliest=-7d@d latest=@d
230
+ earliest=-30d@d latest=now
231
+ earliest=1704067200 latest=1706745600
232
+
233
+ 42 3.14159 -100 1e6 0xFF 0b1010 1.5e-3
234
+
235
+ | eval x=2+3
236
+ | eval y=10-4
237
+ | eval z=6*7
238
+ | eval w=100/3
239
+ | eval r=17%5
240
+ | eval s="hello" . " " . "world"
241
+ status>200 count<=1000 field!=value
242
+ flag=true OR flag=false
243
+ data AND NOT empty
244
+ host XOR backup
245
+ field IN ("a", "b", "c")
246
+ name LIKE "web%"
247
+ | stats count BY host AS hostname OVER status
248
+ | stats count OUTPUT totalcount OUTPUTNEW newcount
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rouge-lexer-spl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Sean Whalen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-03-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rouge
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '3.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '3.0'
27
+ description: A Rouge plugin providing syntax highlighting for Splunk Search Processing
28
+ Language (SPL)
29
+ email:
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - lib/rouge/lexer/spl.rb
35
+ - lib/rouge/lexers/spl.rb
36
+ - spec/demos/spl
37
+ - spec/visual/samples/spl
38
+ homepage: https://github.com/seanthegeek/rouge-lexer-spl
39
+ licenses:
40
+ - MIT
41
+ metadata:
42
+ source_code_uri: https://github.com/seanthegeek/rouge-lexer-spl
43
+ bug_tracker_uri: https://github.com/seanthegeek/rouge-lexer-spl/issues
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '3.0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubygems_version: 3.4.20
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: Rouge lexer for Splunk SPL
63
+ test_files: []