RubyGems - rouge-lexer-spl - Versions diffs - 0.1.0 - Mend

rouge-lexer-spl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 95bcba36175336b5f4bdfb541516536df46990fb43bf2530690815c758ee96bf
+  data.tar.gz: 55818398ba0266efba91869c7d5f508724421d5165c90a0cce98c5e285b812e2
+SHA512:
+  metadata.gz: ef14e53f0f449fa595f51a3227c2fdef71fbd08875f056b57827a01da3f2f4ecd7b50182304fddd948ce8d19376a915e707657399f78f1f43b58bc319e8e4c45
+  data.tar.gz: cf377703f943a0dc81a2c4802772f82aaa382e9830db8deae252dc16b70fb93e770f954e4f6070013a5be3ff05997caab445fa73f12251d0b351cb455ae14cb5

data/lib/rouge/lexer/spl.rb ADDED Viewed

@@ -0,0 +1,4 @@
+# frozen_string_literal: true
+require 'rouge'
+require File.expand_path('../lexers/spl', __dir__)

data/lib/rouge/lexers/spl.rb ADDED Viewed

@@ -0,0 +1,200 @@
+# -*- coding: utf-8 -*- #
+# frozen_string_literal: true
+module Rouge
+  module Lexers
+    class SPL < RegexLexer
+      title "SPL"
+      desc "Splunk Search Processing Language (SPL)"
+      tag 'spl'
+      aliases 'splunk', 'splunk-spl'
+      filenames '*.spl', '*.splunk'
+      mimetypes 'text/x-spl'
+      def self.detect?(text)
+        return true if text =~ /^\s*\|\s*(stats|eval|table|search|where|rex|rename|fields|sort|dedup|timechart|chart|head|tail)\b/i
+        return true if text =~ /\bindex\s*=\s*\w+/i && text =~ /\bsourcetype\s*=\s*/i
+      end
+      # SPL commands (from Splunk command quick reference)
+      def self.commands
+        @commands ||= Set.new %w(
+          abstract accum addcoltotals addinfo addtotals analyzefields
+          anomalies anomalousvalue anomalydetection append appendcols
+          appendpipe arules associate autoregress bin bucket bucketdir
+          chart cluster cofilter collect concurrency contingency convert
+          correlate datamodel dbinspect dedup delete delta diff erex eval
+          eventcount eventstats extract filldown fillnull findtypes
+          folderize foreach format from gauge gentimes geom geomfilter
+          geostats head highlight history iconify inputcsv inputlookup
+          iplocation join kmeans kvform loadjob localize localop lookup
+          makecontinuous makemv makeresults map mcollect metadata
+          metasearch meventcollect mpreview msearch mstats multikv
+          multisearch mvcombine mvexpand nomv outlier outputcsv
+          outputlookup outputtext overlap pivot predict rangemap rare
+          redistribute regex reltime rename replace require rest return
+          reverse rex rtorder savedsearch script run scrub search
+          searchtxn selfjoin sendalert sendemail set setfields sichart
+          sirare sistats sitimechart sitop sort spath stats strcat
+          streamstats table tags tail timechart timewrap tojson top
+          transaction transpose trendline tscollect tstats typeahead
+          typelearner typer union uniq untable walklex where x11 xmlkv
+          xmlunescape xpath xyseries kv
+        )
+      end
+      # Evaluation functions (from Splunk evaluation functions reference)
+      def self.eval_functions
+        @eval_functions ||= Set.new %w(
+          abs acos acosh asin asinh atan atan2 atanh avg
+          bit_and bit_or bit_not bit_xor bit_shift_left bit_shift_right
+          case cidrmatch ceiling coalesce commands cos cosh exact exp
+          false floor hypot if in ipmask isarray isbool isdouble isint
+          ismv isnotnull isnull isnum isobject isstr json json_append
+          json_array json_array_to_mv json_delete json_entries
+          json_extend json_extract json_extract_exact json_has_key_exact
+          json_keys json_object json_set json_set_exact json_valid len
+          like ln log lower ltrim match max md5 min mvappend mvcount
+          mvdedup mvfilter mvfind mvindex mvjoin mvmap mvrange mvsort
+          mvzip mv_to_json_array now null nullif pi pow printf random
+          relative_time replace round rtrim searchmatch sha1 sha256
+          sha512 sigfig sin sinh split sqrt strftime strptime substr sum
+          tan tanh time toarray tobool todouble toint tomv tonumber
+          toobject tostring trim true typeof upper urldecode validate
+        )
+      end
+      # Statistical and charting functions (from Splunk stats functions reference)
+      def self.stats_functions
+        @stats_functions ||= Set.new %w(
+          avg count distinct_count dc estdc estdc_error exactperc max
+          mean median min mode perc percentile range stdev stdevp sum
+          sumsq upperperc var varp first last list values earliest
+          earliest_time latest latest_time per_day per_hour per_minute
+          per_second rate rate_avg rate_sum sparkline
+        )
+      end
+      # Operator keywords
+      def self.operator_words
+        @operator_words ||= Set.new %w(
+          AND OR NOT XOR IN LIKE BY AS OVER OUTPUT OUTPUTNEW WHERE
+        )
+      end
+      # Constants
+      def self.constants
+        @constants ||= Set.new %w(
+          true false TRUE FALSE null NULL
+        )
+      end
+      # Built-in / internal fields
+      def self.builtin_fields
+        @builtin_fields ||= Set.new %w(
+          _time _raw _indextime _cd _serial _bkt _si _sourcetype
+          _subsecond _kv host source sourcetype index splunk_server
+          linecount punct timeendpos timestartpos eventtype tag
+          date_hour date_mday date_minute date_month date_second
+          date_wday date_year date_zone
+        )
+      end
+      state :root do
+        # Whitespace
+        rule %r/\s+/m, Text
+        # Block comments (triple backtick)
+        rule %r/```/, Comment::Multiline, :block_comment
+        # Single-line comments (starting with ` followed by content)
+        # SPL doesn't have single-line comments in the traditional sense
+        # Double-quoted strings
+        rule %r/"/, Str::Double, :double_string
+        # Single-quoted strings (field names)
+        rule %r/'/, Str::Single, :single_string
+        # Backtick-quoted macros/saved searches (not triple)
+        rule %r/`(?!``)/, Name::Function, :backtick_string
+        # Numeric literals
+        rule %r/-?\d+\.\d+(?:e[+-]?\d+)?/i, Num::Float
+        rule %r/-?\d+(?:e[+-]?\d+)?/i, Num::Integer
+        # Time modifiers (e.g., -24h@h, +7d@d, -30m, now)
+        rule %r/[+-]\d+[smhdwqy](?:@[smhdwqy])?/i, Literal::Date
+        # Subsearch brackets
+        rule %r/[\[\]]/, Punctuation
+        # Pipe operator
+        rule %r/\|/, Punctuation
+        # Comparison and assignment operators
+        rule %r/[<>!=]=?/, Operator
+        rule %r/==/, Operator
+        # Arithmetic and string concatenation operators
+        rule %r/[+\-*\/%]/, Operator
+        rule %r/\.\./, Operator
+        rule %r/\.(?!\w)/, Operator
+        # Other punctuation
+        rule %r/[(),;]/, Punctuation
+        # Equals sign (assignment / field=value)
+        rule %r/=/, Operator
+        # Wildcard
+        rule %r/\*/, Operator
+        # Words — classify by set membership
+        rule %r/\w+/ do |m|
+          word = m[0]
+          word_upper = word.upcase
+          word_lower = word.downcase
+          if self.class.constants.include? word
+            token Keyword::Constant
+          elsif self.class.operator_words.include? word_upper
+            token Keyword::Pseudo
+          elsif self.class.commands.include? word_lower
+            token Keyword
+          elsif self.class.eval_functions.include? word_lower
+            token Name::Builtin
+          elsif self.class.stats_functions.include? word_lower
+            token Name::Builtin
+          elsif self.class.builtin_fields.include? word_lower
+            token Name::Variable::Magic
+          else
+            token Name
+          end
+        end
+      end
+      state :block_comment do
+        rule %r/```/, Comment::Multiline, :pop!
+        rule %r/[^`]+/, Comment::Multiline
+        rule %r/`/, Comment::Multiline
+      end
+      state :double_string do
+        rule %r/\\./, Str::Escape
+        rule %r/"/, Str::Double, :pop!
+        rule %r/[^\\"]+/, Str::Double
+      end
+      state :single_string do
+        rule %r/\\./, Str::Escape
+        rule %r/'/, Str::Single, :pop!
+        rule %r/[^\\']+/, Str::Single
+      end
+      state :backtick_string do
+        rule %r/`/, Name::Function, :pop!
+        rule %r/[^`]+/, Name::Function
+      end
+    end
+  end
+end

data/spec/demos/spl ADDED Viewed

@@ -0,0 +1,12 @@
+``` This is a block comment in SPL ```
+index=main sourcetype=access_combined earliest=-24h@h latest=now
+| search status>=400 NOT status=200
+| eval error_type=case(status>=500, "Server Error", status>=400, "Client Error", true(), "Other")
+| stats count AS error_count, values(uri_path) AS paths BY error_type, host
+| where error_count > 10
+| eval message=if(error_count>100, "CRITICAL", "WARNING")
+| eval hash=md5(host)
+| rename error_count AS "Total Errors"
+| sort -error_count
+| table error_type, host, "Total Errors", paths, message
+| head 20

data/spec/visual/samples/spl ADDED Viewed

@@ -0,0 +1,248 @@
+```
+This is a block comment in SPL.
+It can span multiple lines.
+Use triple backticks to start and end.
+```
+``` Short block comment ```
+index=main sourcetype=syslog host=webserver01
+index=firewall sourcetype=cisco:asa action=blocked src_ip=10.0.0.*
+sourcetype=access_combined status>=400
+error OR fail OR critical
+status=200 AND method=GET
+source="/var/log/messages" NOT debug
+index=main sourcetype=access_combined earliest=-24h@h latest=now
+| search status>=400
+| stats count BY status, host
+index=main earliest=-7d@d latest=now
+| timechart span=1h count BY sourcetype
+index=main earliest=-30m latest=now
+| eval duration=round(response_time, 2)
+| eval severity=case(
+    status>=500, "critical",
+    status>=400, "error",
+    status>=300, "redirect",
+    status>=200, "success",
+    true(), "unknown"
+  )
+| eval is_local=if(cidrmatch("10.0.0.0/8", src_ip), "yes", "no")
+| eval combined=mvjoin(mvappend(field1, field2), ",")
+| eval extracted=replace(uri_path, "^/api/v\d+/", "")
+| eval request_time=strftime(_time, "%Y-%m-%d %H:%M:%S")
+| eval epoch=strptime("2024-01-15 08:30:00", "%Y-%m-%d %H:%M:%S")
+| eval checksum=md5(raw_data)
+| eval hash=sha256(username)
+| eval avg_val=avg(field1, field2, field3)
+| eval max_val=max(bytes_in, bytes_out)
+| eval min_val=min(response_time)
+| eval item_count=mvcount(values)
+| eval unique_vals=mvdedup(myfield)
+| eval filtered=mvfilter(match(mvfield, "error"))
+| eval first_three=mvindex(myfield, 0, 2)
+| eval combined=mvzip(names, scores, ":")
+| eval field_len=len(message)
+| eval lower_host=lower(host)
+| eval upper_method=upper(method)
+| eval trimmed=trim(raw, " \t")
+| eval part=substr(uri_path, 1, 10)
+| eval x=abs(-42)
+| eval area=pi() * pow(radius, 2)
+| eval log_val=log(bytes, 10)
+| eval root=sqrt(variance)
+| eval angle_rad=acos(0.5)
+| eval sine=sin(3.14159)
+| eval tangent=atan2(y, x)
+| eval is_number=isnum(field1)
+| eval not_empty=isnotnull(field2)
+| eval field_type=typeof(myfield)
+| eval obj=json_object("name", host, "status", status)
+| eval val=json_extract(payload, "user.name")
+| eval keys=json_keys(my_json)
+| eval is_valid=json_valid(raw_json)
+| eval int_val=tonumber("42")
+| eval str_val=tostring(status)
+| eval bool_val=tobool("true")
+| eval t=time()
+| eval search_start=now()
+| eval adjusted=relative_time(now(), "-1d@d")
+| eval safe_val=coalesce(field1, field2, "default")
+| eval result=nullif(field1, field2)
+| eval val=null()
+| eval rand_num=random()
+index=main sourcetype=access_combined
+| stats count AS total_requests,
+    avg(response_time) AS avg_response,
+    max(response_time) AS max_response,
+    min(response_time) AS min_response,
+    dc(clientip) AS unique_clients,
+    sum(bytes) AS total_bytes,
+    stdev(response_time) AS std_dev,
+    median(response_time) AS median_response,
+    range(response_time) AS response_range,
+    mode(status) AS common_status,
+    var(response_time) AS variance,
+    first(clientip) AS first_client,
+    last(clientip) AS last_client,
+    list(uri_path) AS all_paths,
+    values(method) AS methods,
+    earliest(_time) AS first_seen,
+    latest(_time) AS last_seen,
+    perc95(response_time) AS p95
+  BY host, sourcetype
+index=main
+| timechart span=1h count BY status
+| timechart per_hour(bytes) BY host
+index=main
+| chart count OVER status BY host
+index=main sourcetype=access_combined
+| eventstats avg(response_time) AS global_avg BY host
+index=main sourcetype=access_combined
+| streamstats window=10 avg(response_time) AS rolling_avg
+| rex field=_raw "user=(?<username>\w+)"
+| rex field=uri_path "^/api/(?<api_version>v\d+)/(?<endpoint>\w+)"
+| regex _raw="error|fail|exception"
+index=main
+| append [search index=summary sourcetype=report]
+| join type=outer host [search index=assets | fields host, location, owner]
+index=main
+| eval category=case(status>=500, "error", status>=200, "ok")
+| stats count BY category
+| append [
+    search index=summary report_type=baseline
+    | stats avg(count) AS baseline BY category
+  ]
+`my_saved_macro`
+`generate_report(host, "2024-01-01", "2024-12-31")`
+| search index=main `my_index_macro`
+index=main sourcetype=access_combined
+| fields host, source, sourcetype, _time, _raw, status
+| table host source sourcetype status
+| rename old_field AS new_field
+| rename "Long Field Name" AS short_name, count AS total
+| sort -count, +host
+| sort 0 -num(bytes)
+| dedup host, sourcetype
+| head 100
+| tail 50
+| reverse
+| fillnull value=0 count response_time
+| filldown hostname
+| where isnotnull(error_message)
+| where like(uri_path, "/api/%")
+| where status IN (200, 301, 404, 500)
+| convert timeformat="%Y-%m-%d" ctime(_time) AS event_date
+| convert rmunit(percent_cpu)
+status!=200 AND (method="POST" OR method="PUT") NOT url="*/health*"
+| eval rate=round(bytes/1024/1024, 2)
+| eval ratio=if(total>0, success/total*100, 0)
+| eval flag=if(count>threshold AND severity="high", 1, 0)
+| eval msg="Hello \"world\""
+sourcetype="access_*" index=web_logs
+host='prod-web-*'
+| eval x=10 + 20 - 5 * 3 / 2 % 4
+true false TRUE FALSE null NULL
+_time _raw host source sourcetype index
+| transaction host maxspan=5m maxpause=30s
+| iplocation clientip
+| geostats latfield=lat longfield=lon count BY status
+| predict response_time AS predicted_response future_timespan=24
+| trendline sma5(response_time) AS trend
+| anomalydetection field=response_time
+| makeresults count=10
+| eval random_val=random() % 100
+| outputlookup my_lookup.csv
+| inputlookup my_reference.csv
+| lookup geo_lookup ip AS src_ip OUTPUT city, country
+index=main
+| stats count BY host
+| where count > [search index=summary | stats avg(count) AS threshold | return $threshold]
+| map search="search index=main host=$host$ | stats count"
+| multikv forceheader=1
+| makemv delim="," values
+| mvexpand values
+| foreach * [eval <<FIELD>>=if(isnull(<<FIELD>>), 0, <<FIELD>>)]
+| eval result=printf("%s has %d errors (%.2f%%)", host, count, pct)
+| strcat source ":" sourcetype full_source
+| spath input=json_data path=results{}
+earliest=-24h@h latest=now
+earliest=-7d@d latest=@d
+earliest=-30d@d latest=now
+earliest=1704067200 latest=1706745600
+42 3.14159 -100 1e6 0xFF 0b1010 1.5e-3
+| eval x=2+3
+| eval y=10-4
+| eval z=6*7
+| eval w=100/3
+| eval r=17%5
+| eval s="hello" . " " . "world"
+status>200 count<=1000 field!=value
+flag=true OR flag=false
+data AND NOT empty
+host XOR backup
+field IN ("a", "b", "c")
+name LIKE "web%"
+| stats count BY host AS hostname OVER status
+| stats count OUTPUT totalcount OUTPUTNEW newcount

metadata ADDED Viewed

@@ -0,0 +1,63 @@
+--- !ruby/object:Gem::Specification
+name: rouge-lexer-spl
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Sean Whalen
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2026-03-06 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rouge
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '3.0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '3.0'
+description: A Rouge plugin providing syntax highlighting for Splunk Search Processing
+  Language (SPL)
+email:
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/rouge/lexer/spl.rb
+- lib/rouge/lexers/spl.rb
+- spec/demos/spl
+- spec/visual/samples/spl
+homepage: https://github.com/seanthegeek/rouge-lexer-spl
+licenses:
+- MIT
+metadata:
+  source_code_uri: https://github.com/seanthegeek/rouge-lexer-spl
+  bug_tracker_uri: https://github.com/seanthegeek/rouge-lexer-spl/issues
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '3.0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.4.20
+signing_key:
+specification_version: 4
+summary: Rouge lexer for Splunk SPL
+test_files: []