rouge 3.27.0 → 3.29.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -1
  3. data/lib/rouge/demos/brainfuck +1 -1
  4. data/lib/rouge/demos/brightscript +1 -1
  5. data/lib/rouge/demos/bsl +1 -1
  6. data/lib/rouge/demos/cfscript +1 -1
  7. data/lib/rouge/demos/coq +1 -1
  8. data/lib/rouge/demos/csvs +1 -1
  9. data/lib/rouge/demos/datastudio +0 -1
  10. data/lib/rouge/demos/ecl +0 -1
  11. data/lib/rouge/demos/erlang +1 -1
  12. data/lib/rouge/demos/fluent +13 -0
  13. data/lib/rouge/demos/fsharp +1 -1
  14. data/lib/rouge/demos/glsl +1 -1
  15. data/lib/rouge/demos/haxe +0 -1
  16. data/lib/rouge/demos/idris +13 -0
  17. data/lib/rouge/demos/lean +8 -0
  18. data/lib/rouge/demos/nesasm +1 -1
  19. data/lib/rouge/demos/opentype_feature_file +0 -1
  20. data/lib/rouge/demos/plsql +2 -0
  21. data/lib/rouge/demos/smarty +0 -1
  22. data/lib/rouge/demos/stan +13 -0
  23. data/lib/rouge/demos/stata +14 -0
  24. data/lib/rouge/demos/syzlang +15 -0
  25. data/lib/rouge/demos/syzprog +8 -0
  26. data/lib/rouge/formatters/html_inline.rb +0 -1
  27. data/lib/rouge/lexer.rb +2 -2
  28. data/lib/rouge/lexers/apache/keywords.rb +1 -1
  29. data/lib/rouge/lexers/c.rb +12 -2
  30. data/lib/rouge/lexers/console.rb +1 -1
  31. data/lib/rouge/lexers/cpp.rb +6 -4
  32. data/lib/rouge/lexers/cypher.rb +8 -0
  33. data/lib/rouge/lexers/dart.rb +8 -8
  34. data/lib/rouge/lexers/docker.rb +4 -0
  35. data/lib/rouge/lexers/eiffel.rb +0 -1
  36. data/lib/rouge/lexers/fluent.rb +74 -0
  37. data/lib/rouge/lexers/gherkin/keywords.rb +1 -1
  38. data/lib/rouge/lexers/handlebars.rb +1 -1
  39. data/lib/rouge/lexers/hcl.rb +1 -0
  40. data/lib/rouge/lexers/hylang.rb +0 -1
  41. data/lib/rouge/lexers/idris.rb +210 -0
  42. data/lib/rouge/lexers/jsx.rb +1 -2
  43. data/lib/rouge/lexers/kotlin.rb +3 -1
  44. data/lib/rouge/lexers/lasso/keywords.rb +1 -1
  45. data/lib/rouge/lexers/lean.rb +164 -0
  46. data/lib/rouge/lexers/llvm/keywords.rb +1 -1
  47. data/lib/rouge/lexers/lua/keywords.rb +1 -1
  48. data/lib/rouge/lexers/mathematica/keywords.rb +1 -1
  49. data/lib/rouge/lexers/matlab/keywords.rb +1 -1
  50. data/lib/rouge/lexers/matlab.rb +3 -2
  51. data/lib/rouge/lexers/ocl.rb +0 -1
  52. data/lib/rouge/lexers/php/keywords.rb +1 -1
  53. data/lib/rouge/lexers/plsql.rb +578 -0
  54. data/lib/rouge/lexers/prometheus.rb +0 -1
  55. data/lib/rouge/lexers/python.rb +3 -1
  56. data/lib/rouge/lexers/q.rb +0 -1
  57. data/lib/rouge/lexers/rust.rb +9 -5
  58. data/lib/rouge/lexers/sparql.rb +5 -4
  59. data/lib/rouge/lexers/sqf/keywords.rb +1 -1
  60. data/lib/rouge/lexers/stan.rb +451 -0
  61. data/lib/rouge/lexers/stata.rb +165 -0
  62. data/lib/rouge/lexers/supercollider.rb +0 -1
  63. data/lib/rouge/lexers/syzlang.rb +317 -0
  64. data/lib/rouge/lexers/syzprog.rb +122 -0
  65. data/lib/rouge/lexers/tap.rb +0 -1
  66. data/lib/rouge/lexers/toml.rb +8 -6
  67. data/lib/rouge/lexers/tsx.rb +0 -1
  68. data/lib/rouge/lexers/tulip.rb +0 -1
  69. data/lib/rouge/lexers/viml/keywords.rb +1 -1
  70. data/lib/rouge/version.rb +1 -1
  71. metadata +19 -3
@@ -22,10 +22,13 @@ module Rouge
22
22
 
23
23
  def self.keywords
24
24
  @keywords ||= %w(
25
- as assert async await break crate const continue copy do drop dyn else enum extern
26
- fail false fn for if impl let log loop macro match mod move mut priv pub pure
27
- ref return self Self static struct super true try trait type union unsafe use
28
- where while yield box
25
+ as async await break const continue crate dyn else enum extern false
26
+ fn for if impl in let log loop match mod move mut pub ref return self
27
+ Self static struct super trait true type unsafe use where while
28
+ abstract become box do final macro
29
+ override priv typeof unsized virtual
30
+ yield try
31
+ union
29
32
  )
30
33
  end
31
34
 
@@ -212,7 +215,8 @@ module Rouge
212
215
 
213
216
  state :has_literals do
214
217
  # constants
215
- rule %r/\b(?:true|false|nil)\b/, Keyword::Constant
218
+ rule %r/\b(?:true|false)\b/, Keyword::Constant
219
+
216
220
  # characters/bytes
217
221
  rule %r(
218
222
  b?' (?: #{escapes} | [^\\] ) '
@@ -41,16 +41,17 @@ module Rouge
41
41
  rule %r('''), Str::Single, :string_single_literal
42
42
  rule %r('), Str::Single, :string_single
43
43
 
44
- rule %r([$?]\w+), Name::Variable
45
- rule %r((\w*:)(\w+)?) do |m|
44
+ rule %r([$?][[:word:]]+), Name::Variable
45
+ rule %r(([[:word:]-]*)(:)([[:word:]-]+)?) do |m|
46
46
  token Name::Namespace, m[1]
47
- token Str::Symbol, m[2]
47
+ token Operator, m[2]
48
+ token Str::Symbol, m[3]
48
49
  end
49
50
  rule %r(<[^>]*>), Name::Namespace
50
51
  rule %r(true|false)i, Keyword::Constant
51
52
  rule %r/a\b/, Keyword
52
53
 
53
- rule %r([A-Z]\w+\b)i do |m|
54
+ rule %r([A-Z][[:word:]]+\b)i do |m|
54
55
  if self.class.builtins.include? m[0].upcase
55
56
  token Name::Builtin
56
57
  elsif self.class.keywords.include? m[0].upcase
@@ -9,4 +9,4 @@ module Rouge
9
9
  end
10
10
  end
11
11
  end
12
- end
12
+ end
@@ -0,0 +1,451 @@
1
+ # -*- coding: utf-8 -*- #
2
+ # frozen_string_literal: true
3
+
4
+ module Rouge
5
+ module Lexers
6
+ class Stan < RegexLexer
7
+ title "Stan"
8
+ desc 'Stan Modeling Language (mc-stan.org)'
9
+ tag 'stan'
10
+ filenames '*.stan', '*.stanfunctions'
11
+
12
+ # optional comment or whitespace
13
+ WS = %r((?:\s|//.*?\n|/[*].*?[*]/)+)
14
+ ID = /[a-zA-Z_][a-zA-Z0-9_]*/
15
+ RT = /(?:(?:[a-z_]\s*(?:\[[0-9, ]\])?)\s+)*/
16
+ OP = Regexp.new([
17
+ # Assigment operators
18
+ "=",
19
+
20
+ # Comparison operators
21
+ "<", "<=", ">", ">=", "==", "!=",
22
+
23
+ # Boolean operators
24
+ "!", "&&", "\\|\\|",
25
+
26
+ # Real-valued arithmetic operators
27
+ "\\+", "-", "\\*", "/", "\\^",
28
+
29
+ # Transposition operator
30
+ "'",
31
+
32
+ # Elementwise functions
33
+ "\\.\\+", "\\.-", "\\.\\*", "\\./", "\\.\\^",
34
+
35
+ # Matrix division operators
36
+ "\\\\",
37
+
38
+ # Compound assigment operators
39
+ "\\+=", "-=", "\\*=", "/=", "\\.\\*=", "\\./=",
40
+
41
+ # Sampling
42
+ "~",
43
+
44
+ # Conditional operator
45
+ "\\?", ":"
46
+ ].join("|"))
47
+
48
+ def self.keywords
49
+ @keywords ||= Set.new %w(
50
+ if else while for break continue print reject return
51
+ )
52
+ end
53
+
54
+ def self.types
55
+ @types ||= Set.new %w(
56
+ int real vector ordered positive_ordered simplex unit_vector
57
+ row_vector matrix cholesky_factor_corr cholesky_factor_cov corr_matrix
58
+ cov_matrix data void complex array
59
+ )
60
+ end
61
+
62
+ def self.reserved
63
+ @reserved ||= Set.new [
64
+ # Reserved words from Stan language
65
+ "for", "in", "while", "repeat", "until", "if", "then", "else", "true",
66
+ "false", "target", "functions", "model", "data", "parameters",
67
+ "quantities", "transformed", "generated",
68
+
69
+ # Reserved names from Stan implementation
70
+ "var", "fvar", "STAN_MAJOR", "STAN_MINOR", "STAN_PATCH",
71
+ "STAN_MATH_MAJOR", "STAN_MATH_MINOR", "STAN_MATH_PATCH",
72
+
73
+ # Reserved names from C++
74
+ "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand",
75
+ "bitor", "bool", "break", "case", "catch", "char", "char16_t",
76
+ "char32_t", "class", "compl", "const", "constexpr", "const_cast",
77
+ "continue", "decltype", "default", "delete", "do", "double",
78
+ "dynamic_cast", "else", "enum", "explicit", "export", "extern",
79
+ "false", "float", "for", "friend", "goto", "if", "inline", "int",
80
+ "long", "mutable", "namespace", "new", "noexcept", "not", "not_eq",
81
+ "nullptr", "operator", "or", "or_eq", "private", "protected",
82
+ "public", "register", "reinterpret_cast", "return", "short", "signed",
83
+ "sizeof", "static", "static_assert", "static_cast", "struct",
84
+ "switch", "template", "this", "thread_local", "throw", "true", "try",
85
+ "typedef", "typeid", "typename", "union", "unsigned", "using",
86
+ "virtual", "void", "volatile", "wchar_t", "while", "xor", "xor_eq"
87
+ ]
88
+ end
89
+
90
+ def self.builtin_functions
91
+ @builtin_functions ||= Set.new [
92
+ # Integer-Valued Basic Functions
93
+
94
+ ## Absolute functions
95
+ "abs", "int_step",
96
+
97
+ ## Bound functions
98
+ "min", "max",
99
+
100
+ ## Size functions
101
+ "size",
102
+
103
+ # Real-Valued Basic Functions
104
+
105
+ ## Log probability function
106
+ "target", "get_lp",
107
+
108
+ ## Logical functions
109
+ "step", "is_inf", "is_nan",
110
+
111
+ ## Step-like functions
112
+ "fabs", "fdim", "fmin", "fmax", "fmod", "floor", "ceil", "round",
113
+ "trunc",
114
+
115
+ ## Power and logarithm functions
116
+ "sqrt", "cbrt", "square", "exp", "exp2", "log", "log2", "log10",
117
+ "pow", "inv", "inv_sqrt", "inv_square",
118
+
119
+ ## Trigonometric functions
120
+ "hypot", "cos", "sin", "tan", "acos", "asin", "atan", "atan2",
121
+
122
+ ## Hyperbolic trigonometric functions
123
+ "cosh", "sinh", "tanh", "acosh", "asinh", "atanh",
124
+
125
+ ## Link functions
126
+ "logit", "inv_logit", "inv_cloglog",
127
+
128
+ ## Probability-related functions
129
+ "erf", "erfc", "Phi", "inv_Phi", "Phi_approx", "binary_log_loss",
130
+ "owens_t",
131
+
132
+ ## Combinatorial functions
133
+ "beta", "inc_beta", "lbeta", "tgamma", "lgamma", "digamma",
134
+ "trigamma", "lmgamma", "gamma_p", "gamma_q",
135
+ "binomial_coefficient_log", "choose", "bessel_first_kind",
136
+ "bessel_second_kind", "modified_bessel_first_kind",
137
+ "log_modified_bessel_first_kind", "modified_bessel_second_kind",
138
+ "falling_factorial", "lchoose", "log_falling_factorial",
139
+ "rising_factorial", "log_rising_factorial",
140
+
141
+ ## Composed functions
142
+ "expm1", "fma", "multiply_log", "ldexp", "lmultiply", "log1p",
143
+ "log1m", "log1p_exp", "log1m_exp", "log_diff_exp", "log_mix",
144
+ "log_sum_exp", "log_inv_logit", "log_inv_logit_diff",
145
+ "log1m_inv_logit",
146
+
147
+ ## Special functions
148
+ "lambert_w0", "lambert_wm1",
149
+
150
+ # Complex-Valued Basic Functions
151
+
152
+ ## Complex constructors and accessors
153
+ "to_complex", "get_real", "get_imag",
154
+
155
+ ## Complex special functions
156
+ "arg", "norm", "conj", "proj", "polar",
157
+
158
+ # Array Operations
159
+
160
+ ## Reductions
161
+ "sum", "prod", "log_sum_exp", "mean", "variance", "sd", "distance",
162
+ "squared_distance", "quantile",
163
+
164
+ ## Array size and dimension function
165
+ "dims", "num_elements",
166
+
167
+ ## Array broadcasting
168
+ "rep_array",
169
+
170
+ ## Array concatenation
171
+ "append_array",
172
+
173
+ ## Sorting functions
174
+ "sort_asc", "sort_desc", "sort_indices_asc", "sort_indices_desc",
175
+ "rank",
176
+
177
+ ## Reversing functions
178
+ "reverse",
179
+
180
+ # Matrix Operations
181
+
182
+ ## Integer-valued matrix size functions
183
+ "num_elements", "rows", "cols",
184
+
185
+ ## Dot products and specialized products
186
+ "dot_product", "columns_dot_product", "rows_dot_product", "dot_self",
187
+ "columns_dot_self", "rows_dot_self", "tcrossprod", "crossprod",
188
+ "quad_form", "quad_form_diag", "quad_form_sym", "trace_quad_form",
189
+ "trace_gen_quad_form", "multiply_lower_tri_self_transpose",
190
+ "diag_pre_multiply", "diag_post_multiply",
191
+
192
+ ## Broadcast functions
193
+ "rep_vector", "rep_row_vector", "rep_matrix",
194
+ "symmetrize_from_lower_tri",
195
+
196
+ ## Diagonal matrix functions
197
+ "add_diag", "diagonal", "diag_matrix", "identity_matrix",
198
+
199
+ ## Container construction functions
200
+ "linspaced_array", "linspaced_int_array", "linspaced_vector",
201
+ "linspaced_row_vector", "one_hot_int_array", "one_hot_array",
202
+ "one_hot_vector", "one_hot_row_vector", "ones_int_array",
203
+ "ones_array", "ones_vector", "ones_row_vector", "zeros_int_array",
204
+ "zeros_array", "zeros_vector", "zeros_row_vector", "uniform_simplex",
205
+
206
+ ## Slicing and blocking functions
207
+ "col", "row", "block", "sub_col", "sub_row", "head", "tail",
208
+ "segment",
209
+
210
+ ## Matrix concatenation
211
+ "append_col", "append_row",
212
+
213
+ ## Special matrix functions
214
+ "softmax", "log_softmax", "cumulative_sum",
215
+
216
+ ## Covariance functions
217
+ "cov_exp_quad",
218
+
219
+ ## Linear algebra functions and solvers
220
+ "mdivide_left_tri_low", "mdivide_right_tri_low", "mdivide_left_spd",
221
+ "mdivide_right_spd", "matrix_exp", "matrix_exp_multiply",
222
+ "scale_matrix_exp_multiply", "matrix_power", "trace", "determinant",
223
+ "log_determinant", "inverse", "inverse_spd", "chol2inv",
224
+ "generalized_inverse", "eigenvalues_sym", "eigenvectors_sym",
225
+ "qr_thin_Q", "qr_thin_R", "qr_Q", "qr_R", "cholseky_decompose",
226
+ "singular_values", "svd_U", "svd_V",
227
+
228
+ # Sparse Matrix Operations
229
+
230
+ ## Conversion functions
231
+ "csr_extract_w", "csr_extract_v", "csr_extract_u",
232
+ "csr_to_dense_matrix",
233
+
234
+ ## Sparse matrix arithmetic
235
+ "csr_matrix_times_vector",
236
+
237
+ # Mixed Operations
238
+ "to_matrix", "to_vector", "to_row_vector", "to_array_2d",
239
+ "to_array_1d",
240
+
241
+ # Higher-Order Functions
242
+
243
+ ## Algebraic equation solver
244
+ "algebra_solver", "algebra_solver_newton",
245
+
246
+ ## Ordinary differential equation
247
+ "ode_rk45", "ode_rk45_tol", "ode_ckrk", "ode_ckrk_tol", "ode_adams",
248
+ "ode_adams_tol", "ode_bdf", "ode_bdf_tol", "ode_adjoint_tol_ctl",
249
+
250
+ ## 1D integrator
251
+ "integrate_1d",
252
+
253
+ ## Reduce-sum function
254
+ "reduce_sum", "reduce_sum_static",
255
+
256
+ ## Map-rect function
257
+ "map_rect",
258
+
259
+ # Deprecated Functions
260
+ "integrate_ode_rk45", "integrate_ode", "integrate_ode_adams",
261
+ "integrate_ode_bdf",
262
+
263
+ # Hidden Markov Models
264
+ "hmm_marginal", "hmm_latent_rng", "hmm_hidden_state_prob"
265
+ ]
266
+ end
267
+
268
+ def self.distributions
269
+ @distributions ||= Set.new(
270
+ [
271
+ # Discrete Distributions
272
+
273
+ ## Binary Distributions
274
+ "bernoulli", "bernoulli_logit", "bernoulli_logit_glm",
275
+
276
+ ## Bounded Discrete Distributions
277
+ "binomial", "binomial_logit", "beta_binomial", "hypergeometric",
278
+ "categorical", "categorical_logit_glm", "discrete_range",
279
+ "ordered_logistic", "ordered_logistic_glm", "ordered_probit",
280
+
281
+ ## Unbounded Discrete Distributions
282
+ "neg_binomial", "neg_binomial_2", "neg_binomial_2_log",
283
+ "neg_binomial_2_log_glm", "poisson", "poisson_log",
284
+ "poisson_log_glm",
285
+
286
+ ## Multivariate Discrete Distributions
287
+ "multinomial", "multinomial_logit",
288
+
289
+ # Continuous Distributions
290
+
291
+ ## Unbounded Continuous Distributions
292
+ "normal", "std_normal", "normal_id_glm", "exp_mod_normal",
293
+ "skew_normal", "student_t", "cauchy", "double_exponential",
294
+ "logistic", "gumbel", "skew_double_exponential",
295
+
296
+ ## Positive Continuous Distributions
297
+ "lognormal", "chi_square", "inv_chi_square",
298
+ "scaled_inv_chi_square", "exponential", "gamma", "inv_gamma",
299
+ "weibull", "frechet", "rayleigh",
300
+
301
+ ## Positive Lower-Bounded Distributions
302
+ "pareto", "pareto_type_2", "wiener",
303
+
304
+ ## Continuous Distributions on [0, 1]
305
+ "beta", "beta_proportion",
306
+
307
+ ## Circular Distributions
308
+ "von_mises",
309
+
310
+ ## Bounded Continuous Distributions
311
+ "uniform",
312
+
313
+ ## Distributions over Unbounded Vectors
314
+ "multi_normal", "multi_normal_prec", "multi_normal_cholesky",
315
+ "multi_gp", "multi_gp_cholesky", "multi_student_t",
316
+ "gaussian_dlm_obs",
317
+
318
+ ## Simplex Distributions
319
+ "dirichlet",
320
+
321
+ ## Correlation Matrix Distributions
322
+ "lkj_corr", "lkj_corr_cholesky",
323
+
324
+ ## Covariance Matrix Distributions
325
+ "wishart", "inv_wishart"
326
+ ].product([
327
+ "", "_lpmf", "_lupmf", "_lpdf", "_lcdf", "_lccdf", "_rng", "_log",
328
+ "_cdf_log", "_ccdf_log"
329
+ ]).map {|s| "#{s[0]}#{s[1]}"}
330
+ )
331
+ end
332
+
333
+ def self.constants
334
+ @constants ||= Set.new [
335
+ # Mathematical constants
336
+ "pi", "e", "sqrt2", "log2", "log10",
337
+
338
+ # Special values
339
+ "not_a_number", "positive_infinity", "negative_infinity",
340
+ "machine_precision"
341
+ ]
342
+ end
343
+
344
+ state :root do
345
+ mixin :whitespace
346
+ rule %r/#include/, Comment::Preproc, :include
347
+ rule %r/#.*$/, Generic::Deleted
348
+ rule %r(
349
+ functions
350
+ |(?:transformed\s+)?data
351
+ |(?:transformed\s+)?parameters
352
+ |model
353
+ |generated\s+quantities
354
+ )x, Name::Namespace
355
+ rule %r(\{), Punctuation, :bracket_scope
356
+ mixin :scope
357
+ end
358
+
359
+ state :include do
360
+ rule %r((\s+)(\S+)(\s*)) do |m|
361
+ token Text, m[1]
362
+ token Comment::PreprocFile, m[2]
363
+ token Text, m[3]
364
+ pop!
365
+ end
366
+ end
367
+
368
+ state :whitespace do
369
+ rule %r(\n+)m, Text
370
+ rule %r(//(\\.|.)*?$), Comment::Single
371
+ mixin :inline_whitespace
372
+ end
373
+
374
+ state :inline_whitespace do
375
+ rule %r([ \t\r]+), Text
376
+ rule %r(/(\\\n)?[*].*?[*](\\\n)?/)m, Comment::Multiline
377
+ end
378
+
379
+ state :statements do
380
+ mixin :whitespace
381
+ rule %r/#include/, Comment::Preproc, :include
382
+ rule %r/#.*$/, Generic::Deleted
383
+ rule %r("), Str, :string
384
+ rule %r(
385
+ (
386
+ ((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+|\d+)
387
+ (#{WS})[+-](#{WS})
388
+ ((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+|\d+)i
389
+ )
390
+ |((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+|\d+)i
391
+ |((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+)
392
+ )mx, Num::Float
393
+ rule %r/\d+/, Num::Integer
394
+ rule %r(\*/), Error
395
+ rule OP, Operator
396
+ rule %r([\[\],.;]), Punctuation
397
+ rule %r([|](?![|])), Punctuation
398
+ rule %r(T\b), Keyword::Reserved
399
+ rule %r((lower|upper)\b), Name::Attribute
400
+ rule ID do |m|
401
+ name = m[0]
402
+
403
+ if self.class.keywords.include? name
404
+ token Keyword
405
+ elsif self.class.types.include? name
406
+ token Keyword::Type
407
+ elsif self.class.reserved.include? name
408
+ token Keyword::Reserved
409
+ else
410
+ token Name::Variable
411
+ end
412
+ end
413
+ end
414
+
415
+ state :scope do
416
+ mixin :whitespace
417
+ rule %r(
418
+ (#{RT}) # Return type
419
+ (#{ID}) # Function name
420
+ (?=\([^;]*?\)) # Signature or arguments
421
+ )mx do |m|
422
+ recurse m[1]
423
+
424
+ name = m[2]
425
+ if self.class.builtin_functions.include? name
426
+ token Name::Builtin, name
427
+ elsif self.class.distributions.include? name
428
+ token Name::Builtin, name
429
+ elsif self.class.constants.include? name
430
+ token Keyword::Constant
431
+ else
432
+ token Name::Function, name
433
+ end
434
+ end
435
+ rule %r(\{), Punctuation, :bracket_scope
436
+ rule %r(\(), Punctuation, :parens_scope
437
+ mixin :statements
438
+ end
439
+
440
+ state :bracket_scope do
441
+ mixin :scope
442
+ rule %r(\}), Punctuation, :pop!
443
+ end
444
+
445
+ state :parens_scope do
446
+ mixin :scope
447
+ rule %r(\)), Punctuation, :pop!
448
+ end
449
+ end
450
+ end
451
+ end
@@ -0,0 +1,165 @@
1
+ # -*- coding: utf-8 -*- #
2
+ # frozen_string_literal: true
3
+
4
+ module Rouge
5
+ module Lexers
6
+ class Stata < RegexLexer
7
+ title "Stata"
8
+ desc "The Stata programming language (www.stata.com)"
9
+ tag 'stata'
10
+ filenames '*.do', '*.ado'
11
+ mimetypes 'application/x-stata', 'text/x-stata'
12
+
13
+ ###
14
+ # Stata reference manual is available online at: https://www.stata.com/features/documentation/
15
+ ###
16
+
17
+ # Partial list of common programming and estimation commands, as of Stata 16
18
+ # Note: not all abbreviations are included
19
+ KEYWORDS = %w(
20
+ do run include clear assert set mata log
21
+ by bys bysort cap capt capture char class classutil which cdir confirm new existence creturn
22
+ _datasignature discard di dis disp displ displa display ereturn error _estimates exit file open read write seek close query findfile fvexpand
23
+ gettoken java home heapmax java_heapmax icd9 icd9p icd10 icd10cm icd10pcs initialize javacall levelsof
24
+ tempvar tempname tempfile macro shift uniq dups retokenize clean sizeof posof
25
+ makecns matcproc marksample mark markout markin svymarkout matlist
26
+ accum define dissimilarity eigenvalues get rowjoinbyname rownames score svd symeigen dir list ren rename
27
+ more pause plugin call postfile _predict preserve restore program define drop end python qui quietly noi noisily _return return _rmcoll rmsg _robust
28
+ serset locale_functions locale_ui signestimationsample checkestimationsample sleep syntax sysdir adopath adosize
29
+ tabdisp timer tokenize trace unab unabcmd varabbrev version viewsource
30
+ window fopen fsave manage menu push stopbox
31
+ net from cd link search install sj stb ado update uninstall pwd ssc ls
32
+ using insheet outsheet mkmat svmat sum summ summarize
33
+ graph gr_edit twoway histogram kdensity spikeplot
34
+ mi miss missing var varname order compress append
35
+ gen gene gener genera generat generate egen replace duplicates
36
+ estimates nlcom lincom test testnl predict suest
37
+ _regress reg regr regre regres regress probit logit ivregress logistic svy gmm ivprobit ivtobit
38
+ bsample assert codebook collapse compare contract copy count cross datasignature d ds desc describe destring tostring
39
+ drawnorm edit encode decode erase expand export filefilter fillin format frame frget frlink gsort
40
+ import dbase delimited excel fred haver sas sasxport5 sasxport8 spss infile infix input insobs inspect ipolate isid
41
+ joinby label language labelbook lookfor memory mem merge mkdir mvencode notes obs odbc order outfile
42
+ pctile xtile _pctile putmata range recast recode rename group reshape rm rmdir sample save saveold separate shell snapshot sort split splitsample stack statsby sysuse
43
+ type unicode use varmanage vl webuse xpose zipfile
44
+ number keep tab table tabulate stset stcox tsset xtset
45
+ )
46
+
47
+ # Complete list of functions by name, as of Stata 16
48
+ PRIMITIVE_FUNCTIONS = %w(
49
+ abbrev abs acos acosh age age_frac asin asinh atan atan2 atanh autocode
50
+ betaden binomial binomialp binomialtail binormal birthday bofd byteorder
51
+ c _caller cauchy cauchyden cauchytail Cdhms ceil char chi2 chi2den chi2tail Chms
52
+ chop cholesky clip Clock clock clockdiff cloglog Cmdyhms Cofc cofC Cofd cofd coleqnumb
53
+ collatorlocale collatorversion colnfreeparms colnumb colsof comb cond corr cos cosh
54
+ daily date datediff datediff_frac day det dgammapda dgammapdada dgammapdadx dgammapdxdx dhms
55
+ diag diag0cnt digamma dofb dofC dofc dofh dofm dofq dofw dofy dow doy dunnettprob e el epsdouble
56
+ epsfloat exp expm1 exponential exponentialden exponentialtail
57
+ F Fden fileexists fileread filereaderror filewrite float floor fmtwidth frval _frval Ftail
58
+ fammaden gammap gammaptail get hadamard halfyear halfyearly has_eprop hh hhC hms hofd hours
59
+ hypergeometric hypergeometricp
60
+ I ibeta ibetatail igaussian igaussianden igaussiantail indexnot inlist inrange int inv invbinomial invbinomialtail
61
+ invcauchy invcauchytail invchi2 invchi2tail invcloglog invdunnettprob invexponential invexponentialtail invF
62
+ invFtail invgammap invgammaptail invibeta invibetatail invigaussian invigaussiantail invlaplace invlaplacetail
63
+ invlogistic invlogistictail invlogit invnbinomial invnbinomialtail invnchi2 invnchi2tail invnF invnFtail invnibeta invnormal invnt invnttail
64
+ invpoisson invpoissontail invsym invt invttail invtukeyprob invweibull invweibullph invweibullphtail invweibulltail irecode islepyear issymmetric
65
+ J laplace laplaceden laplacetail ln ln1m ln1p lncauchyden lnfactorial lngamma lnigammaden lnigaussianden lniwishartden lnlaplaceden lnmvnormalden
66
+ lnnormal lnnormalden lnnormalden lnnormalden lnwishartden log log10 log1m log1p logistic logisticden logistictail logit
67
+ matmissing matrix matuniform max maxbyte maxdouble maxfloat maxint maxlong mdy mdyhms mi min minbyte mindouble minfloat minint minlong minutes
68
+ missing mm mmC mod mofd month monthly mreldif msofhours msofminutes msofseconds
69
+ nbetaden nbinomial nbinomialp nbinomialtail nchi2 nchi2den nchi2tail nextbirthday nextleapyear nF nFden nFtail nibeta
70
+ normal normalden npnchi2 npnF npnt nt ntden nttail nullmat
71
+ plural poisson poissonp poissontail previousbirthday previousleapyear qofd quarter quarterly r rbeta rbinomial rcauchy rchi2 recode
72
+ real regexm regexr regexs reldif replay return rexponential rgamma rhypergeometric rigaussian rlaplace rlogistic rnormal
73
+ round roweqnumb rownfreeparms rownumb rowsof rpoisson rt runiform runiformint rweibull rweibullph
74
+ s scalar seconds sign sin sinh smallestdouble soundex soundex_nara sqrt ss ssC strcat strdup string stritrim strlen strlower
75
+ strltrim strmatch strofreal strpos strproper strreverse strrpos strrtrim strtoname strtrim strupper subinstr subinword substr sum sweep
76
+ t tan tanh tC tc td tden th tin tm tobytes tq trace trigamma trunc ttail tukeyprob tw twithin
77
+ uchar udstrlen udsubstr uisdigit uisletter uniform ustrcompare ustrcompareex ustrfix ustrfrom ustrinvalidcnt ustrleft ustrlen ustrlower
78
+ ustrltrim ustrnormalize ustrpos ustrregexm ustrregexra ustrregexrf ustrregexs ustrreverse ustrright ustrrpos ustrrtrim ustrsortkey
79
+ ustrsortkeyex ustrtitle ustrto ustrtohex ustrtoname ustrtrim ustrunescape ustrupper ustrword ustrwordcount usubinstr usubstr
80
+ vec vecdiag week weekly weibull weibullden weibullph weibullphden weibullphtail weibulltail wofd word wordbreaklocale wordcount
81
+ year yearly yh ym yofd yq yw
82
+ )
83
+
84
+ # Note: types `str1-str2045` handled separately below
85
+ def self.type_keywords
86
+ @type_keywords ||= Set.new %w(byte int long float double str strL numeric string integer scalar matrix local global numlist varlist newlist)
87
+ end
88
+
89
+ # Stata commands used with braces. Includes all valid abbreviations for 'forvalues'.
90
+ def self.reserved_keywords
91
+ @reserved_keywords ||= Set.new %w(if else foreach forv forva forval forvalu forvalue forvalues to while in of continue break nobreak)
92
+ end
93
+
94
+ ###
95
+ # Lexer state and rules
96
+ ###
97
+ state :root do
98
+
99
+ # Pre-processor commands: #
100
+ rule %r/^\s*#.*$/, Comment::Preproc
101
+
102
+ # Hashbang comments: *!
103
+ rule %r/^\*!.*$/, Comment::Hashbang
104
+
105
+ # Single-line comment: *
106
+ rule %r/^\s*\*.*$/, Comment::Single
107
+
108
+ # Keywords: recognize only when they are the first word
109
+ rule %r/^\s*(#{KEYWORDS.join('|')})\b/, Keyword
110
+
111
+ # Whitespace. Classify `\n` as `Text` to avoid interference with `Comment` and `Keyword` above
112
+ rule(/[ \t]+/, Text::Whitespace)
113
+ rule(/[\n\r]+/, Text)
114
+
115
+ # In-line comment: //
116
+ rule %r/\/\/.*?$/, Comment::Single
117
+
118
+ # Multi-line comment: /* and */
119
+ rule %r(/(\\\n)?[*].*?[*](\\\n)?/)m, Comment::Multiline
120
+
121
+ # Strings indicated by compound double-quotes (`""') and double-quotes ("")
122
+ rule %r/`"(\\.|.)*?"'/, Str::Double
123
+ rule %r/"(\\.|.)*?"/, Str::Double
124
+
125
+ # Format locals (`') and globals ($) as strings
126
+ rule %r/`(\\.|.)*?'/, Str::Double
127
+ rule %r/(?<!\w)\$\w+/, Str::Double
128
+
129
+ # Display formats
130
+ rule %r/\%\S+/, Name::Property
131
+
132
+ # Additional string types: str1-str2045
133
+ rule %r/\bstr(204[0-5]|20[0-3][0-9]|[01][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[1-9])\b/, Keyword::Type
134
+
135
+ # Only recognize primitive functions when they are actually used as a function call, i.e. followed by an opening parenthesis
136
+ # `Name::Builtin` would be more logical, but is not usually highlighted, so use `Name::Function` instead
137
+ rule %r/\b(#{PRIMITIVE_FUNCTIONS.join('|')})(?=\()/, Name::Function
138
+
139
+ # Matrix operator `..` (declare here instead of with other operators, in order to avoid conflict with numbers below)
140
+ rule %r/\.\.(?=.*\])/, Operator
141
+
142
+ # Numbers
143
+ rule %r/[+-]?(\d+([.]\d+)?|[.]\d+)([eE][+-]?\d+)?/, Num
144
+
145
+ # Factor variable and time series operators
146
+ rule %r/\b[ICOicoLFDSlfds]\w*\./, Operator
147
+ rule %r/\b[ICOicoLFDSlfds]\w*(?=\(.*\)\.)/, Operator
148
+
149
+ rule %r/\w+/ do |m|
150
+ if self.class.reserved_keywords.include? m[0]
151
+ token Keyword::Reserved
152
+ elsif self.class.type_keywords.include? m[0]
153
+ token Keyword::Type
154
+ else
155
+ token Name
156
+ end
157
+ end
158
+
159
+ rule %r/[\[\]{}();,]/, Punctuation
160
+
161
+ rule %r([-<>?*+'^/\\!#.=~:&|]), Operator
162
+ end
163
+ end
164
+ end
165
+ end
@@ -113,4 +113,3 @@ module Rouge
113
113
  end
114
114
  end
115
115
  end
116
-