rouge 4.6.1 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -4
  3. data/bin/rougify +3 -4
  4. data/lib/rouge/cli.rb +7 -10
  5. data/lib/rouge/demos/abap +30 -4
  6. data/lib/rouge/demos/dylan +8 -0
  7. data/lib/rouge/demos/gjs +23 -0
  8. data/lib/rouge/demos/gts +36 -0
  9. data/lib/rouge/demos/kick_assembler +14 -0
  10. data/lib/rouge/demos/pdf +29 -0
  11. data/lib/rouge/demos/thrift +15 -0
  12. data/lib/rouge/demos/veryl +24 -0
  13. data/lib/rouge/eager.rb +3 -0
  14. data/lib/rouge/formatters/html.rb +8 -1
  15. data/lib/rouge/formatters/html_debug.rb +16 -0
  16. data/lib/rouge/formatters/html_legacy.rb +15 -1
  17. data/lib/rouge/formatters/html_legacy_table.rb +57 -0
  18. data/lib/rouge/formatters/html_line_highlighter.rb +11 -4
  19. data/lib/rouge/formatters/html_line_table.rb +8 -3
  20. data/lib/rouge/formatters/html_linewise.rb +11 -2
  21. data/lib/rouge/formatters/html_pygments.rb +6 -1
  22. data/lib/rouge/formatters/html_table.rb +47 -21
  23. data/lib/rouge/formatters/terminal256.rb +3 -3
  24. data/lib/rouge/formatters/tex.rb +1 -1
  25. data/lib/rouge/guesser.rb +1 -1
  26. data/lib/rouge/guessers/disambiguation.rb +2 -2
  27. data/lib/rouge/guessers/glob_mapping.rb +2 -2
  28. data/lib/rouge/guessers/modeline.rb +2 -2
  29. data/lib/rouge/lexer.rb +40 -21
  30. data/lib/rouge/lexers/abap/builtins.rb +187 -0
  31. data/lib/rouge/lexers/abap.rb +70 -178
  32. data/lib/rouge/lexers/ada.rb +1 -1
  33. data/lib/rouge/lexers/apache/keywords.rb +3 -11
  34. data/lib/rouge/lexers/apache.rb +25 -24
  35. data/lib/rouge/lexers/apex.rb +2 -2
  36. data/lib/rouge/lexers/apiblueprint.rb +2 -2
  37. data/lib/rouge/lexers/bicep.rb +2 -2
  38. data/lib/rouge/lexers/biml.rb +36 -9
  39. data/lib/rouge/lexers/brightscript.rb +3 -4
  40. data/lib/rouge/lexers/c.rb +9 -5
  41. data/lib/rouge/lexers/cobol.rb +1 -1
  42. data/lib/rouge/lexers/console.rb +11 -5
  43. data/lib/rouge/lexers/cpp.rb +10 -12
  44. data/lib/rouge/lexers/crystal.rb +1 -1
  45. data/lib/rouge/lexers/css.rb +21 -2
  46. data/lib/rouge/lexers/cuda.rb +4 -4
  47. data/lib/rouge/lexers/cython.rb +26 -50
  48. data/lib/rouge/lexers/dafny.rb +1 -1
  49. data/lib/rouge/lexers/dart.rb +2 -0
  50. data/lib/rouge/lexers/datastudio.rb +1 -1
  51. data/lib/rouge/lexers/digdag.rb +2 -3
  52. data/lib/rouge/lexers/dylan.rb +109 -0
  53. data/lib/rouge/lexers/ecl.rb +3 -3
  54. data/lib/rouge/lexers/elixir.rb +14 -9
  55. data/lib/rouge/lexers/escape.rb +1 -1
  56. data/lib/rouge/lexers/factor.rb +1 -1
  57. data/lib/rouge/lexers/freefem.rb +2 -2
  58. data/lib/rouge/lexers/ghc_cmm.rb +1 -1
  59. data/lib/rouge/lexers/ghc_core.rb +1 -1
  60. data/lib/rouge/lexers/gherkin/keywords.rb +10 -6
  61. data/lib/rouge/lexers/gherkin.rb +28 -26
  62. data/lib/rouge/lexers/gjs.rb +39 -0
  63. data/lib/rouge/lexers/glsl/builtins.rb +17 -0
  64. data/lib/rouge/lexers/glsl.rb +50 -113
  65. data/lib/rouge/lexers/go.rb +8 -6
  66. data/lib/rouge/lexers/gradle.rb +2 -2
  67. data/lib/rouge/lexers/groovy.rb +6 -0
  68. data/lib/rouge/lexers/gts.rb +39 -0
  69. data/lib/rouge/lexers/hack.rb +4 -6
  70. data/lib/rouge/lexers/haxe.rb +2 -2
  71. data/lib/rouge/lexers/hlsl.rb +6 -6
  72. data/lib/rouge/lexers/hocon.rb +2 -2
  73. data/lib/rouge/lexers/hql.rb +15 -19
  74. data/lib/rouge/lexers/idris.rb +1 -1
  75. data/lib/rouge/lexers/igorpro/builtins.rb +1530 -0
  76. data/lib/rouge/lexers/igorpro.rb +49 -520
  77. data/lib/rouge/lexers/irb.rb +3 -3
  78. data/lib/rouge/lexers/isbl.rb +7 -43
  79. data/lib/rouge/lexers/j.rb +1 -1
  80. data/lib/rouge/lexers/json5.rb +2 -2
  81. data/lib/rouge/lexers/json_doc.rb +2 -2
  82. data/lib/rouge/lexers/jsp.rb +119 -119
  83. data/lib/rouge/lexers/jsx.rb +2 -2
  84. data/lib/rouge/lexers/kick_assembler.rb +100 -0
  85. data/lib/rouge/lexers/kotlin.rb +5 -0
  86. data/lib/rouge/lexers/lasso/keywords.rb +6 -6
  87. data/lib/rouge/lexers/lasso.rb +9 -11
  88. data/lib/rouge/lexers/liquid.rb +5 -6
  89. data/lib/rouge/lexers/llvm/keywords.rb +3 -12
  90. data/lib/rouge/lexers/llvm.rb +5 -16
  91. data/lib/rouge/lexers/lua/keywords.rb +11 -13
  92. data/lib/rouge/lexers/lua.rb +8 -7
  93. data/lib/rouge/lexers/lutin.rb +5 -4
  94. data/lib/rouge/lexers/m68k.rb +3 -3
  95. data/lib/rouge/lexers/mason.rb +110 -110
  96. data/lib/rouge/lexers/mathematica/keywords.rb +1 -3
  97. data/lib/rouge/lexers/mathematica.rb +6 -7
  98. data/lib/rouge/lexers/matlab/keywords.rb +2 -2
  99. data/lib/rouge/lexers/matlab.rb +5 -5
  100. data/lib/rouge/lexers/meson.rb +1 -1
  101. data/lib/rouge/lexers/mojo.rb +6 -2
  102. data/lib/rouge/lexers/moonscript.rb +19 -17
  103. data/lib/rouge/lexers/nesasm.rb +6 -6
  104. data/lib/rouge/lexers/nial.rb +7 -8
  105. data/lib/rouge/lexers/nim.rb +3 -3
  106. data/lib/rouge/lexers/nix.rb +10 -8
  107. data/lib/rouge/lexers/objective_c.rb +3 -3
  108. data/lib/rouge/lexers/objective_cpp.rb +3 -2
  109. data/lib/rouge/lexers/ocaml.rb +2 -2
  110. data/lib/rouge/lexers/openedge.rb +9 -9
  111. data/lib/rouge/lexers/pdf.rb +109 -0
  112. data/lib/rouge/lexers/php/keywords.rb +136 -188
  113. data/lib/rouge/lexers/php.rb +100 -11
  114. data/lib/rouge/lexers/plsql.rb +8 -9
  115. data/lib/rouge/lexers/postscript.rb +1 -1
  116. data/lib/rouge/lexers/powershell.rb +1 -1
  117. data/lib/rouge/lexers/python.rb +111 -66
  118. data/lib/rouge/lexers/qml.rb +2 -2
  119. data/lib/rouge/lexers/reasonml.rb +2 -2
  120. data/lib/rouge/lexers/rego.rb +53 -53
  121. data/lib/rouge/lexers/rescript.rb +2 -2
  122. data/lib/rouge/lexers/robot_framework.rb +12 -12
  123. data/lib/rouge/lexers/{coq.rb → rocq.rb} +15 -6
  124. data/lib/rouge/lexers/ruby.rb +33 -7
  125. data/lib/rouge/lexers/rust.rb +1 -0
  126. data/lib/rouge/lexers/sas.rb +164 -165
  127. data/lib/rouge/lexers/sass/common.rb +1 -1
  128. data/lib/rouge/lexers/sass.rb +2 -2
  129. data/lib/rouge/lexers/scss.rb +2 -2
  130. data/lib/rouge/lexers/sed.rb +1 -1
  131. data/lib/rouge/lexers/shell.rb +5 -5
  132. data/lib/rouge/lexers/slice.rb +2 -2
  133. data/lib/rouge/lexers/sparql.rb +1 -1
  134. data/lib/rouge/lexers/sqf/keywords.rb +5 -3
  135. data/lib/rouge/lexers/sqf.rb +3 -4
  136. data/lib/rouge/lexers/stan.rb +9 -15
  137. data/lib/rouge/lexers/svelte.rb +2 -2
  138. data/lib/rouge/lexers/swift.rb +2 -2
  139. data/lib/rouge/lexers/tcl.rb +1 -1
  140. data/lib/rouge/lexers/terraform.rb +3 -3
  141. data/lib/rouge/lexers/thrift.rb +120 -0
  142. data/lib/rouge/lexers/tsx.rb +3 -3
  143. data/lib/rouge/lexers/ttcn3.rb +1 -1
  144. data/lib/rouge/lexers/twig.rb +2 -2
  145. data/lib/rouge/lexers/typescript.rb +3 -3
  146. data/lib/rouge/lexers/varnish.rb +8 -8
  147. data/lib/rouge/lexers/veryl.rb +109 -0
  148. data/lib/rouge/lexers/viml/keywords.rb +5 -7
  149. data/lib/rouge/lexers/viml.rb +8 -10
  150. data/lib/rouge/lexers/vue.rb +2 -2
  151. data/lib/rouge/lexers/xquery.rb +2 -1
  152. data/lib/rouge/lexers/yaml.rb +13 -1
  153. data/lib/rouge/lexers/yang.rb +42 -74
  154. data/lib/rouge/lexers/zig.rb +10 -10
  155. data/lib/rouge/plugins/redcarpet.rb +3 -2
  156. data/lib/rouge/regex_lexer.rb +25 -14
  157. data/lib/rouge/themes/gruvbox.rb +3 -0
  158. data/lib/rouge/token.rb +1 -1
  159. data/lib/rouge/util.rb +1 -1
  160. data/lib/rouge/version.rb +1 -1
  161. data/lib/rouge.rb +70 -59
  162. data/lib/rubocop/cop/rouge/no_building_alternation_pattern_in_regexp.rb +71 -0
  163. data/lib/rubocop/cop/rouge/no_huge_collections.rb +37 -0
  164. data/rouge.gemspec +8 -5
  165. metadata +44 -9
  166. data/lib/rouge/lexers/isbl/builtins.rb +0 -17
  167. data/lib/rouge/lexers/matlab/builtins.rb +0 -11
  168. /data/lib/rouge/demos/{coq → rocq} +0 -0
@@ -64,19 +64,18 @@ module Rouge
64
64
  @builtins ||= Set.new %w(
65
65
  roAppendFile roAppInfo roAppManager roArray roAssociativeArray
66
66
  roAudioGuide roAudioMetadata roAudioPlayer roAudioPlayerEvent
67
- roAudioResourceroBitmap roBoolean roBoolean roBrightPackage roBrSub
67
+ roAudioResourceroBitmap roBoolean roBrightPackage roBrSub
68
68
  roButton roByteArray roCaptionRenderer roCaptionRendererEvent
69
69
  roCecInterface roCECStatusEvent roChannelStore roChannelStoreEvent
70
70
  roClockWidget roCodeRegistrationScreen
71
- roCodeRegistrationScreenEventroCompositor roControlDown roControlPort
72
- roControlPort roControlUp roCreateFile roDatagramReceiver
71
+ roCodeRegistrationScreenEventroCompositor roControlDown roControlPort roControlUp roCreateFile roDatagramReceiver
73
72
  roDatagramSender roDataGramSocket roDateTime roDeviceInfo
74
73
  roDeviceInfoEvent roDoubleroEVPCipher roEVPDigest roFileSystem
75
74
  roFileSystemEvent roFloat roFont roFontMetrics roFontRegistry
76
75
  roFunction roGlobal roGpio roGridScreen roGridScreenEvent
77
76
  roHdmiHotPlugEventroHdmiStatus roHdmiStatusEvent roHMAC roHttpAgent
78
77
  roImageCanvas roImageCanvasEvent roImageMetadata roImagePlayer
79
- roImageWidgetroInput roInputEvent roInt roInt roInvalid roInvalid
78
+ roImageWidgetroInput roInputEvent roInt roInvalid
80
79
  roIRRemote roKeyboard roKeyboardPress roKeyboardScreen
81
80
  roKeyboardScreenEventroList roListScreen roListScreenEvent
82
81
  roLocalization roLongInteger roMessageDialog roMessageDialogEvent
@@ -17,9 +17,10 @@ module Rouge
17
17
 
18
18
  def self.keywords
19
19
  @keywords ||= Set.new %w(
20
- auto break case const continue default do else enum extern
21
- for goto if register restricted return sizeof static struct
22
- switch typedef union volatile virtual while
20
+ alignas alignof auto break case const constexpr continue
21
+ default do else enum extern for goto if register return
22
+ sizeof static static_assert struct switch typedef typeof
23
+ typeof_unqual union volatile while
23
24
 
24
25
  _Alignas _Alignof _Atomic _Generic _Imaginary
25
26
  _Noreturn _Static_assert _Thread_local
@@ -44,6 +45,8 @@ module Rouge
44
45
  uintmax_t
45
46
 
46
47
  char16_t char32_t
48
+
49
+ _BitInt _Decimal128 _Decimal32 _Decimal64 bool nullptr_t
47
50
  )
48
51
  end
49
52
 
@@ -52,7 +55,8 @@ module Rouge
52
55
  __asm __int8 __based __except __int16 __stdcall __cdecl
53
56
  __fastcall __int32 __declspec __finally __int61 __try __leave
54
57
  inline _inline __inline naked _naked __naked restrict _restrict
55
- __restrict thread _thread __thread typename _typename __typename
58
+ __restrict thread _thread __thread thread_local
59
+ typename _typename __typename
56
60
  )
57
61
  end
58
62
 
@@ -108,7 +112,7 @@ module Rouge
108
112
  rule %r([~!%^&*+=\|?:<>/-]), Operator
109
113
  rule %r/[()\[\],.;]/, Punctuation
110
114
  rule %r/\bcase\b/, Keyword, :case
111
- rule %r/(?:true|false|NULL)\b/, Name::Builtin
115
+ rule %r/(?:true|false|NULL|nullptr)\b/, Name::Builtin
112
116
  rule id do |m|
113
117
  name = m[0]
114
118
 
@@ -7,7 +7,7 @@ module Rouge
7
7
  title 'COBOL'
8
8
  desc 'COBOL (Common Business-Oriented Language) programming language'
9
9
  tag 'cobol'
10
- filenames '*.cob', '*.cbl'
10
+ filenames '*.cob', '*.cbl', '*.cpy', '*.cpb'
11
11
  mimetypes 'text/x-cobol'
12
12
 
13
13
  identifier = /\p{Alpha}[\p{Alnum}-]*/
@@ -75,7 +75,7 @@ module Rouge
75
75
 
76
76
  def end_chars
77
77
  @end_chars ||= if @prompt.any?
78
- @prompt.reject { |c| c.empty? }
78
+ @prompt.reject { |c| c.empty? }.uniq.first(20)
79
79
  elsif allow_comments?
80
80
  %w($ > ;)
81
81
  else
@@ -85,7 +85,9 @@ module Rouge
85
85
 
86
86
  def error_regex
87
87
  @error_regex ||= if @error.any?
88
- /^(?:#{@error.map(&Regexp.method(:escape)).join('|')})/
88
+ #rubocop:disable Rouge/NoBuildingAlternationPatternInRegexp
89
+ /^(?:#{@error.first(20).map { |e| Regexp.escape(e) }.join('|')})/
90
+ #rubocop:enable Rouge/NoBuildingAlternationPatternInRegexp
89
91
  end
90
92
  end
91
93
 
@@ -144,7 +146,7 @@ module Rouge
144
146
  yield Text::Whitespace, $& unless $&.empty?
145
147
 
146
148
  lang_lexer.continue_lex($', &output)
147
- elsif comment_regex =~ input[0].strip
149
+ elsif allow_comments? && comment_regex =~ input[0].strip
148
150
  puts "console: matched comment #{input[0].inspect}" if @debug
149
151
  output_lexer.reset!
150
152
  lang_lexer.reset!
@@ -173,13 +175,17 @@ module Rouge
173
175
  end
174
176
 
175
177
  def prompt_regex
178
+ # [jneen] these characters can come from user input. They are escaped here,
179
+ # and we limit the user to 20.
180
+ #rubocop:disable Rouge/NoBuildingAlternationPatternInRegexp
176
181
  @prompt_regex ||= begin
177
- /^#{prompt_prefix_regex}(?:#{end_chars.map(&Regexp.method(:escape)).join('|')})/
182
+ /^#{prompt_prefix_regex}(?:#{end_chars.map { |c| Regexp.escape(c) }.join('|')})/
178
183
  end
184
+ #rubocop:enable Rouge/NoBuildingAlternationPatternInRegexp
179
185
  end
180
186
 
181
187
  def stream_tokens(input, &output)
182
- input = StringScanner.new(input)
188
+ input = StringScanner.new(input, fixed_anchor: true)
183
189
  lang_lexer.reset!
184
190
  output_lexer.reset!
185
191
 
@@ -1,10 +1,10 @@
1
1
  # -*- coding: utf-8 -*- #
2
2
  # frozen_string_literal: true
3
3
 
4
+ require_relative 'c'
5
+
4
6
  module Rouge
5
7
  module Lexers
6
- load_lexer 'c.rb'
7
-
8
8
  class Cpp < C
9
9
  title "C++"
10
10
  desc "The C++ programming language"
@@ -22,21 +22,19 @@ module Rouge
22
22
 
23
23
  def self.keywords
24
24
  @keywords ||= super + Set.new(%w(
25
- asm auto catch char8_t concept
26
- consteval constexpr constinit const_cast co_await co_return co_yield
27
- delete dynamic_cast explicit export friend
28
- mutable namespace new operator private protected public
29
- reinterpret_cast requires restrict size_of static_cast this throw throws
30
- typeid typename using virtual final override import module
31
-
32
- alignas alignof decltype noexcept static_assert
33
- thread_local try
25
+ and and_eq asm bitand bitor catch compl concept consteval
26
+ constinit const_cast co_await co_return co_yield decltype
27
+ delete dynamic_cast explicit export final friend import
28
+ module mutable namespace new noexcept not not_eq operator or
29
+ or_eq override private protected public reinterpret_cast
30
+ requires size_of static_cast this throw throws try typeid
31
+ typename using virtual xor xor_eq
34
32
  ))
35
33
  end
36
34
 
37
35
  def self.keywords_type
38
36
  @keywords_type ||= super + Set.new(%w(
39
- bool
37
+ char8_t
40
38
  ))
41
39
  end
42
40
 
@@ -80,7 +80,7 @@ module Rouge
80
80
  rule %r/\b[\p{Ll}_]\p{Word}*?[?!]?:\s+/, Str::Symbol, :expr_start
81
81
  rule %r/"/, Str::Double, :simple_string
82
82
  rule %r/(?<!\.)`/, Str::Backtick, :simple_backtick
83
- rule %r/(')(\\u[a-fA-F0-9]{4}|\\u\{[a-fA-F0-9]{1,6}\}|\\[abefnrtv])?(\\\\|\\'|[^'])*(')/ do
83
+ rule %r/(')(\\u[a-fA-F0-9]{4}|\\u\{[a-fA-F0-9]{1,6}\}|\\[abefnrtv])?(\\\\|\\'|[^'\n])*(')/ do
84
84
  groups Str::Single, Str::Escape, Str::Single, Str::Single
85
85
  end
86
86
  end
@@ -13,7 +13,19 @@ module Rouge
13
13
 
14
14
  # Documentation: https://www.w3.org/TR/CSS21/syndata.html#characters
15
15
 
16
- identifier = /[\p{L}_-][\p{Word}\p{Cf}-]*/
16
+ # [jneen] workaround for:
17
+ # https://bugs.ruby-lang.org/issues/21870#change-116371
18
+ #
19
+ # As of ruby 4+, \p{Word} matches ZWJ and ZWNJ, so the additional
20
+ # \p{Cf} is not needed.
21
+ #
22
+ # That being said... this still warns, but at least it's only once?
23
+ identifier = if RUBY_VERSION < '4'
24
+ /[\p{L}_-][\p{Word}\p{Cf}-]*/
25
+ else
26
+ /[\p{L}_-][\p{Word}-]*/
27
+ end
28
+
17
29
  number = /-?(?:[0-9]+(\.[0-9]+)?|\.[0-9]+)/
18
30
 
19
31
  def self.properties
@@ -126,7 +138,7 @@ module Rouge
126
138
  above absolute accumulate add additive all alpha alphabetic
127
139
  alternate alternate-reverse always armenian aural auto auto-fill
128
140
  auto-fit avoid backwards balance baseline behind below bidi-override
129
- blink block bold bolder border-box both bottom bottom break-spaces
141
+ blink block bold bolder border-box both bottom break-spaces
130
142
  capitalize center center-left center-right circle cjk-ideographic
131
143
  close-quote closest-corner closest-side collapse
132
144
  color color-burn color-dodge column column-reverse
@@ -192,6 +204,7 @@ module Rouge
192
204
  seagreen seashell sienna silver skyblue slateblue slategray snow
193
205
  springgreen steelblue tan teal thistle tomato
194
206
  turquoise violet wheat white whitesmoke yellow yellowgreen
207
+ rebeccapurple
195
208
  )
196
209
  end
197
210
 
@@ -245,6 +258,10 @@ module Rouge
245
258
  rule %r/(true|false)/i, Name::Constant
246
259
  rule %r/\-\-#{identifier}/, Literal
247
260
  rule %r([*+/-]), Operator
261
+ rule %r/(url(?:-prefix)?)([(])(.*?)([)])/ do
262
+ groups Name::Function, Punctuation, Str::Other, Punctuation
263
+ end
264
+
248
265
  rule(identifier) do |m|
249
266
  if self.class.colors.include? m[0].downcase
250
267
  token Name::Other
@@ -304,6 +321,8 @@ module Rouge
304
321
 
305
322
  push :stanza_value
306
323
  end
324
+
325
+ mixin :root
307
326
  end
308
327
 
309
328
  state :stanza_value do
@@ -1,9 +1,9 @@
1
1
  # -*- coding: utf-8 -*- #
2
2
 
3
+ require_relative 'cpp'
4
+
3
5
  module Rouge
4
6
  module Lexers
5
- load_lexer 'cpp.rb'
6
-
7
7
  class CUDA < Cpp
8
8
  title "CUDA"
9
9
  desc "Compute Unified Device Architecture, used for programming with NVIDIA GPU"
@@ -24,8 +24,8 @@ module Rouge
24
24
  short1 short2 short3 short4 ushort1 ushort2 ushort3 ushort4
25
25
  int1 int2 int3 int4 uint1 uint2 uint3 uint4
26
26
  long1 long2 long3 long4 ulong1 ulong2 ulong3 ulong4
27
- longlong1 longlong2 longlong3 longlong4
28
- ulonglong1 ulonglong2 ulonglong3 ulonglong4
27
+ longlong1 longlong2 longlong3 longlong4
28
+ ulonglong1 ulonglong2 ulonglong3 ulonglong4
29
29
  float1 float2 float3 float4 double1 double2 double3 double4
30
30
  dim3
31
31
  ))
@@ -1,10 +1,10 @@
1
1
  # -*- coding: utf-8 -*- #
2
2
  # frozen_string_literal: true
3
3
 
4
+ require_relative 'python'
5
+
4
6
  module Rouge
5
7
  module Lexers
6
- load_lexer 'python.rb'
7
-
8
8
  class Cython < Python
9
9
  title "Cython"
10
10
  desc "Cython and Pyrex source code (cython.org)"
@@ -25,13 +25,20 @@ module Rouge
25
25
  end
26
26
 
27
27
  def self.c_keywords
28
- @ckeywords ||= %w(
28
+ @c_keywords ||= Set.new %w(
29
29
  public readonly extern api inline enum union
30
30
  )
31
31
  end
32
32
 
33
+ def self.builtins
34
+ @builtins ||= super + %w(python_call)
35
+ end
36
+
33
37
  identifier = /[a-z_]\w*/i
34
- dotted_identifier = /[a-z_.][\w.]*/i
38
+
39
+ prepend :from_import do
40
+ rule %r/cimport\b/, Keyword::Namespace, :pop!
41
+ end
35
42
 
36
43
  prepend :root do
37
44
  rule %r/cp?def|ctypedef/ do
@@ -40,54 +47,16 @@ module Rouge
40
47
  push :c_start
41
48
  end
42
49
 
43
- rule %r/(from)((?:\\\s|\s)+)(#{dotted_identifier})((?:\\\s|\s)+)(cimport)/ do
44
- groups Keyword::Namespace,
45
- Text,
46
- Name::Namespace,
47
- Text,
48
- Keyword::Namespace
49
- end
50
-
51
- rule %r/(cimport)(\s+)(#{dotted_identifier})/ do
52
- groups Keyword::Namespace, Text, Name::Namespace
53
- end
50
+ rule %r/cimport\b/, Keyword::Namespace, :import
54
51
 
55
52
  rule %r/(struct)((?:\\\s|\s)+)/ do
56
53
  groups Keyword, Text
57
54
  push :classname
58
55
  end
59
56
 
60
- mixin :func_call_fix
61
-
62
57
  rule %r/[(,]/, Punctuation, :c_start
63
58
  end
64
59
 
65
- prepend :classname do
66
- rule %r/(?:\\\s|\s)+/, Text
67
- end
68
-
69
- prepend :funcname do
70
- rule %r/(?:\\\s|\s)+/, Text
71
- end
72
- # This is a fix for the way that function calls are lexed in the Python
73
- # lexer. This should be moved to the Python lexer once confirmed that it
74
- # does not cause any regressions.
75
- state :func_call_fix do
76
- rule %r/#{identifier}(?=\()/ do |m|
77
- if self.class.keywords.include? m[0]
78
- token Keyword
79
- elsif self.class.exceptions.include? m[0]
80
- token Name::Builtin
81
- elsif self.class.builtins.include? m[0]
82
- token Name::Builtin
83
- elsif self.class.builtins_pseudo.include? m[0]
84
- token Name::Builtin::Pseudo
85
- else
86
- token Name::Function
87
- end
88
- end
89
- end
90
-
91
60
  # The Cython lexer adds three states to those already in the Python lexer.
92
61
  # Calls to `cdef`, `cpdef` and `ctypedef` move the lexer into the :c_start
93
62
  # state. The primary purpose of this state is to highlight datatypes. Once
@@ -97,7 +66,7 @@ module Rouge
97
66
  # have moved out of a C block.
98
67
 
99
68
  state :c_start do
100
- rule %r/[^\S\n]+/, Text
69
+ mixin :inline_whitespace
101
70
 
102
71
  rule %r/cp?def|ctypedef/, Keyword
103
72
 
@@ -106,16 +75,16 @@ module Rouge
106
75
  # This rule matches identifiers that could be type declarations. The
107
76
  # lookahead matches (1) pointers, (2) arrays and (3) variable names.
108
77
  rule %r/#{identifier}(?=(?:\*+)|(?:[ \t]*\[)|(?:[ \t]+\w))/ do |m|
109
- if self.class.keywords.include? m[0]
78
+ if self.class.keywords.include?(m[0])
110
79
  token Keyword
111
80
  pop!
112
- elsif %w(def).include? m[0]
81
+ elsif m[0] == 'def'
113
82
  token Keyword
114
83
  goto :funcname
115
- elsif %w(struct class).include? m[0]
116
- token Keyword::Reserved
84
+ elsif %w(struct class).include?(m[0])
85
+ token Keyword
117
86
  goto :classname
118
- elsif self.class.c_keywords.include? m[0]
87
+ elsif self.class.c_keywords.include?(m[0])
119
88
  token Keyword::Reserved
120
89
  else
121
90
  token Keyword::Type
@@ -144,7 +113,14 @@ module Rouge
144
113
  end
145
114
  end
146
115
 
147
- rule(//) { @indentation = nil; reset_stack }
116
+ rule(//) do
117
+ @indentation = nil
118
+ # pop c_indent
119
+ pop!
120
+
121
+ # replace :c_definitions with :newline
122
+ goto :newline
123
+ end
148
124
  end
149
125
  end
150
126
  end
@@ -58,7 +58,7 @@ module Rouge
58
58
  schar = /(?:[^\\"\n\r]|\\["'ntr\\0])/
59
59
  uchar = /(?:\\u#{hex_digit}{4})/
60
60
 
61
- ## IMPORTANT: Rules are ordered, which allows later rules to be
61
+ ## IMPORTANT: Rules are ordered, which allows later rules to be
62
62
  ## simpler than they would otherwise be
63
63
  state :root do
64
64
  rule %r(/\*), Comment::Multiline, :comment
@@ -45,6 +45,8 @@ module Rouge
45
45
  rule %r(/\*.*?\*/)m, Comment::Multiline
46
46
  rule %r/"/, Str, :dqs
47
47
  rule %r/'/, Str, :sqs
48
+ rule %r/r""".*?"""/m, Str::Other
49
+ rule %r/r'''.*?'''/m, Str::Other
48
50
  rule %r/r"[^"]*"/, Str::Other
49
51
  rule %r/r'[^']*'/, Str::Other
50
52
  rule %r/##{id}*/i, Str::Symbol
@@ -124,7 +124,7 @@ module Rouge
124
124
  rule %r/#{id}(?=\s*[(])/, Name::Function
125
125
  rule id do |m|
126
126
  name = m[0].upcase
127
-
127
+
128
128
  if self.class.sql_keywords.include? name
129
129
  token Keyword
130
130
  else
@@ -1,10 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'set'
3
+ require_relative 'yaml'
4
+
4
5
  module Rouge
5
6
  module Lexers
6
- load_lexer 'yaml.rb'
7
-
8
7
  class Digdag < YAML
9
8
  title 'digdag'
10
9
  desc 'A simple, open source, multi-cloud workflow engine (https://www.digdag.io/)'
@@ -0,0 +1,109 @@
1
+ # -*- coding: utf-8 -*- #
2
+ # frozen_string_literal: true
3
+
4
+ module Rouge
5
+ module Lexers
6
+ class Dylan < RegexLexer
7
+ title 'Dylan'
8
+ desc 'Dylan Language (https://opendylan.org)'
9
+ tag 'dylan'
10
+ filenames '*.dylan'
11
+
12
+ # Definitions from the Dylan Reference Manual
13
+ # see:
14
+ # https://opendylan.org/books/drm/Modules
15
+ # https://opendylan.org/books/drm/Conditional_Execution
16
+ # https://opendylan.org/books/drm/Statement_Macros
17
+ reserved_words = Set.new %w(
18
+ begin block case class constant create define domain else
19
+ end exception for function generic handler if let library local
20
+ macro method module otherwise select unless until variable while
21
+ )
22
+
23
+ hash_word = Set.new %w(#t #f #next #rest #key #all-keys #include)
24
+ operators = Set.new %w(+ - * / ^ = == ~ ~= ~== < <= > >= & | :=)
25
+
26
+ state :root do
27
+ rule %r/^[\w.-]+:/, Comment::Preproc, :header
28
+ rule %r/\s+/, Text::Whitespace
29
+ rule(%r//) { goto :main }
30
+ end
31
+
32
+ # see https://opendylan.org/books/drm/Dylan_Interchange_Format
33
+ state :header do
34
+ rule(/.*?$/) { token Comment; goto :header_value }
35
+ end
36
+
37
+ state :header_value do
38
+ # line continuations are defined as any line that starts with whitespace
39
+ rule %r/^[ \t]+.*?$/, Comment
40
+ rule %r/\n+/, Comment
41
+ rule(//) { pop! }
42
+ end
43
+
44
+ state :main do
45
+ # Comments
46
+ rule %r(//.*?$), Comment::Single
47
+ rule %r(/\*.*?\*/)m, Comment::Multiline
48
+ rule %r/\s+/, Text::Whitespace
49
+
50
+ # Keywords
51
+ rule %r/\w+/ do |m|
52
+ if reserved_words.include?(m[0])
53
+ token Keyword
54
+ elsif hash_word.include?(m[0])
55
+ token Keyword::Constant
56
+ else
57
+ fallthrough!
58
+ end
59
+ end
60
+
61
+ rule %r/#(t|f|next|rest|key|all-keys|include)\b/, Keyword::Constant
62
+
63
+ # Numbers
64
+ rule %r([+-]?\d+/\d+), Literal::Number::Other
65
+ rule %r/[+-]?\d*[.]\d+(?:e[+-]?\d+)?/i, Literal::Number::Float
66
+ rule %r/[+-]?\d+[.]\d*(?:e[+-]?\d+)?/i, Literal::Number::Float
67
+ rule %r/[+-]\d+(?:e[+-]?\d+)?/i, Literal::Number::Float
68
+ rule %r/#b[01]+/, Literal::Number::Bin
69
+ rule %r/#o[0-7]+/, Literal::Number::Oct
70
+ rule %r/[+-]?[0-9]+/, Literal::Number::Integer
71
+ rule %r/#x[0-9a-f]+/i, Literal::Number::Hex
72
+
73
+ # Operators and punctuation
74
+ rule %r/::|=>|#[(\[#]|[.][.][.]|[(),.;\[\]{}=?]/, Punctuation
75
+
76
+ word_re = %r([\w!&*<>|^\$%@][\w!&*<>|^\$%@=/?~+-]*|[+-~])
77
+
78
+ rule %r/\\#{word_re}/, Str::Symbol
79
+
80
+ rule word_re do |m|
81
+ word = m[0]
82
+ if operators.include?(word)
83
+ token Operator
84
+ elsif word.start_with?('<') && word.end_with?('>')
85
+ token Name::Class
86
+ elsif word.start_with?('*') && word.end_with?('*')
87
+ token Name::Variable::Instance
88
+ elsif word.start_with?('$')
89
+ token Name::Constant
90
+ else
91
+ token Name
92
+ end
93
+ end
94
+
95
+ rule %r/:/, Operator # For 'constrained names'
96
+ # Strings, characters and whitespace
97
+ rule %r/"/, Str::Double, :dq
98
+ rule %r/'([^\\']|(\\[\\'abefnrt0])|(\\[0-9a-f]+))'/, Str::Char
99
+ end
100
+
101
+ state :dq do
102
+ rule %r/\\[\\'"abefnrt0]/, Str::Escape
103
+ rule %r/\\<\h+>/, Str::Escape
104
+ rule %r/[^\\"]+/, Str::Double
105
+ rule %r/"/, Str::Double, :pop!
106
+ end
107
+ end
108
+ end
109
+ end
@@ -42,9 +42,9 @@ module Rouge
42
42
  row rowdiff sample set sin sinh sizeof soapcall sort sorted sqrt
43
43
  stepped stored sum table tan tanh thisnode topn tounicode toxml
44
44
  transfer transform trim truncate typeof ungroup unicodeorder variance
45
- which workunit xmldecode xmlencode xmltext xmlunicode apply assert
46
- build buildindex evaluate fail keydiff keypatch loadxml nothor notify
47
- output parallel sequential soapcall wait
45
+ which workunit xmldecode xmlencode xmltext xmlunicode assert
46
+ build buildindex fail keydiff keypatch loadxml nothor notify
47
+ output parallel sequential wait
48
48
  )
49
49
  end
50
50
 
@@ -99,43 +99,48 @@ module Rouge
99
99
 
100
100
  state :sigil_strings do
101
101
  # ~-sigiled strings
102
- # ~(abc), ~[abc], ~<abc>, ~|abc|, ~r/abc/, etc
102
+ # ~r(abc), ~r[abc], ~r<abc>, ~r|abc|, ~r/abc/, etc
103
103
  # Cribbed and adjusted from Ruby lexer
104
104
  delimiter_map = { '{' => '}', '[' => ']', '(' => ')', '<' => '>' }
105
- # Match a-z for custom sigils too
106
- sigil_opens = Regexp.union(delimiter_map.keys + %w(| / ' "))
107
- rule %r/~([A-Za-z])?(#{sigil_opens})/ do |m|
105
+ sigil_opens = Regexp.union(delimiter_map.keys + [%r/"{3}/] + %w(| / ' "))
106
+ rule %r/~([a-z]|[A-Z][A-Z0-9]*)(#{sigil_opens})/ do |m|
108
107
  open = Regexp.escape(m[2])
109
108
  close = Regexp.escape(delimiter_map[m[2]] || m[2])
110
- interp = /[SRCW]/ === m[1]
109
+ interp = /^[srcw]$/ === m[1]
111
110
  toktype = Str::Other
112
111
 
113
112
  puts " open: #{open.inspect}" if @debug
114
113
  puts " close: #{close.inspect}" if @debug
115
114
 
116
115
  # regexes
117
- if 'Rr'.include? m[1]
116
+ if m[1] == 'r' || m[1] == 'R'
118
117
  toktype = Str::Regex
119
118
  push :regex_flags
120
119
  end
121
120
 
122
- if 'Ww'.include? m[1]
121
+ if m[1] == 'w' || m[1] == 'W'
123
122
  push :list_flags
124
123
  end
125
124
 
125
+ if open == '"""'
126
+ toktype = Str::Doc
127
+ end
128
+
126
129
  token toktype
127
130
 
128
131
  push do
129
132
  rule %r/#{close}/, toktype, :pop!
130
133
 
131
- if interp
134
+ if toktype == Str::Doc
135
+ rule %r/(?:.|\n)*?"""/, toktype, :pop!
136
+ elsif interp
132
137
  mixin :interpoling
133
138
  rule %r/#/, toktype
134
139
  else
135
140
  rule %r/[\\#]/, toktype
136
141
  end
137
142
 
138
- uniq_chars = [open, close].uniq.join
143
+ uniq_chars = [open, close].uniq.join.squeeze
139
144
  rule %r/[^##{uniq_chars}\\]+/m, toktype
140
145
  end
141
146
  end
@@ -33,7 +33,7 @@ module Rouge
33
33
  end
34
34
 
35
35
  def stream_tokens(str, &b)
36
- stream = StringScanner.new(str)
36
+ stream = StringScanner.new(str, fixed_anchor: true)
37
37
 
38
38
  loop do
39
39
  if stream.scan(to_start_regex)
@@ -25,7 +25,7 @@ module Rouge
25
25
  do unless* if* loop bi-curry* drop when* assert= retainstack
26
26
  assert? -rot execute 2bi@ 2tri@ boa with either? 3drop bi
27
27
  curry? datastack until 3dip over 3curry tri-curry* tri-curry@
28
- swap and 2nip throw bi-curry (clone) hashcode* compose 2dip if
28
+ swap and 2nip throw bi-curry (clone) hashcode* compose 2dip
29
29
  3tri unless compose? tuple keep 2curry equal? assert tri 2drop
30
30
  most <wrapper> boolean? identity-hashcode identity-tuple?
31
31
  null new dip bi-curry@ rot xor identity-tuple boolean
@@ -1,10 +1,10 @@
1
1
  # -*- coding: utf-8 -*- #
2
2
  # frozen_string_literal: true
3
3
 
4
+ require_relative 'cpp'
5
+
4
6
  module Rouge
5
7
  module Lexers
6
- load_lexer 'cpp.rb'
7
-
8
8
  class FreeFEM < Cpp
9
9
  title "FreeFEM"
10
10
  desc "The FreeFEM programming language (freefem.org)"
@@ -245,7 +245,7 @@ module Rouge
245
245
  # `(type /* optional whitespace */ var_name /* optional whitespace */)`
246
246
  # Note: Only the token for type is produced here.
247
247
  rule %r{
248
- (^#{id})
248
+ (#{id})
249
249
  (#{ws}+)
250
250
  (#{id})
251
251
  }mx do |m|