rouge 3.27.0 → 3.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +5 -1
- data/lib/rouge/demos/brainfuck +1 -1
- data/lib/rouge/demos/brightscript +1 -1
- data/lib/rouge/demos/bsl +1 -1
- data/lib/rouge/demos/cfscript +1 -1
- data/lib/rouge/demos/coq +1 -1
- data/lib/rouge/demos/csvs +1 -1
- data/lib/rouge/demos/datastudio +0 -1
- data/lib/rouge/demos/ecl +0 -1
- data/lib/rouge/demos/erlang +1 -1
- data/lib/rouge/demos/fluent +13 -0
- data/lib/rouge/demos/fsharp +1 -1
- data/lib/rouge/demos/glsl +1 -1
- data/lib/rouge/demos/haxe +0 -1
- data/lib/rouge/demos/idris +13 -0
- data/lib/rouge/demos/lean +8 -0
- data/lib/rouge/demos/nesasm +1 -1
- data/lib/rouge/demos/opentype_feature_file +0 -1
- data/lib/rouge/demos/plsql +2 -0
- data/lib/rouge/demos/smarty +0 -1
- data/lib/rouge/demos/stan +13 -0
- data/lib/rouge/demos/stata +14 -0
- data/lib/rouge/demos/syzlang +15 -0
- data/lib/rouge/demos/syzprog +8 -0
- data/lib/rouge/formatters/html_inline.rb +0 -1
- data/lib/rouge/lexer.rb +2 -2
- data/lib/rouge/lexers/apache/keywords.rb +1 -1
- data/lib/rouge/lexers/c.rb +12 -2
- data/lib/rouge/lexers/console.rb +1 -1
- data/lib/rouge/lexers/cpp.rb +6 -4
- data/lib/rouge/lexers/cypher.rb +8 -0
- data/lib/rouge/lexers/dart.rb +8 -8
- data/lib/rouge/lexers/docker.rb +4 -0
- data/lib/rouge/lexers/eiffel.rb +0 -1
- data/lib/rouge/lexers/fluent.rb +74 -0
- data/lib/rouge/lexers/gherkin/keywords.rb +1 -1
- data/lib/rouge/lexers/handlebars.rb +1 -1
- data/lib/rouge/lexers/hcl.rb +1 -0
- data/lib/rouge/lexers/hylang.rb +0 -1
- data/lib/rouge/lexers/idris.rb +210 -0
- data/lib/rouge/lexers/jsx.rb +1 -2
- data/lib/rouge/lexers/kotlin.rb +3 -1
- data/lib/rouge/lexers/lasso/keywords.rb +1 -1
- data/lib/rouge/lexers/lean.rb +164 -0
- data/lib/rouge/lexers/llvm/keywords.rb +1 -1
- data/lib/rouge/lexers/lua/keywords.rb +1 -1
- data/lib/rouge/lexers/mathematica/keywords.rb +1 -1
- data/lib/rouge/lexers/matlab/keywords.rb +1 -1
- data/lib/rouge/lexers/matlab.rb +3 -2
- data/lib/rouge/lexers/ocl.rb +0 -1
- data/lib/rouge/lexers/php/keywords.rb +1 -1
- data/lib/rouge/lexers/plsql.rb +578 -0
- data/lib/rouge/lexers/prometheus.rb +0 -1
- data/lib/rouge/lexers/python.rb +3 -1
- data/lib/rouge/lexers/q.rb +0 -1
- data/lib/rouge/lexers/rust.rb +9 -5
- data/lib/rouge/lexers/sparql.rb +5 -4
- data/lib/rouge/lexers/sqf/keywords.rb +1 -1
- data/lib/rouge/lexers/stan.rb +451 -0
- data/lib/rouge/lexers/stata.rb +165 -0
- data/lib/rouge/lexers/supercollider.rb +0 -1
- data/lib/rouge/lexers/syzlang.rb +317 -0
- data/lib/rouge/lexers/syzprog.rb +122 -0
- data/lib/rouge/lexers/tap.rb +0 -1
- data/lib/rouge/lexers/toml.rb +8 -6
- data/lib/rouge/lexers/tsx.rb +0 -1
- data/lib/rouge/lexers/tulip.rb +0 -1
- data/lib/rouge/lexers/viml/keywords.rb +1 -1
- data/lib/rouge/version.rb +1 -1
- metadata +19 -3
data/lib/rouge/lexers/rust.rb
CHANGED
@@ -22,10 +22,13 @@ module Rouge
|
|
22
22
|
|
23
23
|
def self.keywords
|
24
24
|
@keywords ||= %w(
|
25
|
-
as
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
as async await break const continue crate dyn else enum extern false
|
26
|
+
fn for if impl in let log loop match mod move mut pub ref return self
|
27
|
+
Self static struct super trait true type unsafe use where while
|
28
|
+
abstract become box do final macro
|
29
|
+
override priv typeof unsized virtual
|
30
|
+
yield try
|
31
|
+
union
|
29
32
|
)
|
30
33
|
end
|
31
34
|
|
@@ -212,7 +215,8 @@ module Rouge
|
|
212
215
|
|
213
216
|
state :has_literals do
|
214
217
|
# constants
|
215
|
-
rule %r/\b(?:true|false
|
218
|
+
rule %r/\b(?:true|false)\b/, Keyword::Constant
|
219
|
+
|
216
220
|
# characters/bytes
|
217
221
|
rule %r(
|
218
222
|
b?' (?: #{escapes} | [^\\] ) '
|
data/lib/rouge/lexers/sparql.rb
CHANGED
@@ -41,16 +41,17 @@ module Rouge
|
|
41
41
|
rule %r('''), Str::Single, :string_single_literal
|
42
42
|
rule %r('), Str::Single, :string_single
|
43
43
|
|
44
|
-
rule %r([$?]
|
45
|
-
rule %r((
|
44
|
+
rule %r([$?][[:word:]]+), Name::Variable
|
45
|
+
rule %r(([[:word:]-]*)(:)([[:word:]-]+)?) do |m|
|
46
46
|
token Name::Namespace, m[1]
|
47
|
-
token
|
47
|
+
token Operator, m[2]
|
48
|
+
token Str::Symbol, m[3]
|
48
49
|
end
|
49
50
|
rule %r(<[^>]*>), Name::Namespace
|
50
51
|
rule %r(true|false)i, Keyword::Constant
|
51
52
|
rule %r/a\b/, Keyword
|
52
53
|
|
53
|
-
rule %r([A-Z]
|
54
|
+
rule %r([A-Z][[:word:]]+\b)i do |m|
|
54
55
|
if self.class.builtins.include? m[0].upcase
|
55
56
|
token Name::Builtin
|
56
57
|
elsif self.class.keywords.include? m[0].upcase
|
@@ -0,0 +1,451 @@
|
|
1
|
+
# -*- coding: utf-8 -*- #
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Rouge
|
5
|
+
module Lexers
|
6
|
+
class Stan < RegexLexer
|
7
|
+
title "Stan"
|
8
|
+
desc 'Stan Modeling Language (mc-stan.org)'
|
9
|
+
tag 'stan'
|
10
|
+
filenames '*.stan', '*.stanfunctions'
|
11
|
+
|
12
|
+
# optional comment or whitespace
|
13
|
+
WS = %r((?:\s|//.*?\n|/[*].*?[*]/)+)
|
14
|
+
ID = /[a-zA-Z_][a-zA-Z0-9_]*/
|
15
|
+
RT = /(?:(?:[a-z_]\s*(?:\[[0-9, ]\])?)\s+)*/
|
16
|
+
OP = Regexp.new([
|
17
|
+
# Assigment operators
|
18
|
+
"=",
|
19
|
+
|
20
|
+
# Comparison operators
|
21
|
+
"<", "<=", ">", ">=", "==", "!=",
|
22
|
+
|
23
|
+
# Boolean operators
|
24
|
+
"!", "&&", "\\|\\|",
|
25
|
+
|
26
|
+
# Real-valued arithmetic operators
|
27
|
+
"\\+", "-", "\\*", "/", "\\^",
|
28
|
+
|
29
|
+
# Transposition operator
|
30
|
+
"'",
|
31
|
+
|
32
|
+
# Elementwise functions
|
33
|
+
"\\.\\+", "\\.-", "\\.\\*", "\\./", "\\.\\^",
|
34
|
+
|
35
|
+
# Matrix division operators
|
36
|
+
"\\\\",
|
37
|
+
|
38
|
+
# Compound assigment operators
|
39
|
+
"\\+=", "-=", "\\*=", "/=", "\\.\\*=", "\\./=",
|
40
|
+
|
41
|
+
# Sampling
|
42
|
+
"~",
|
43
|
+
|
44
|
+
# Conditional operator
|
45
|
+
"\\?", ":"
|
46
|
+
].join("|"))
|
47
|
+
|
48
|
+
def self.keywords
|
49
|
+
@keywords ||= Set.new %w(
|
50
|
+
if else while for break continue print reject return
|
51
|
+
)
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.types
|
55
|
+
@types ||= Set.new %w(
|
56
|
+
int real vector ordered positive_ordered simplex unit_vector
|
57
|
+
row_vector matrix cholesky_factor_corr cholesky_factor_cov corr_matrix
|
58
|
+
cov_matrix data void complex array
|
59
|
+
)
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.reserved
|
63
|
+
@reserved ||= Set.new [
|
64
|
+
# Reserved words from Stan language
|
65
|
+
"for", "in", "while", "repeat", "until", "if", "then", "else", "true",
|
66
|
+
"false", "target", "functions", "model", "data", "parameters",
|
67
|
+
"quantities", "transformed", "generated",
|
68
|
+
|
69
|
+
# Reserved names from Stan implementation
|
70
|
+
"var", "fvar", "STAN_MAJOR", "STAN_MINOR", "STAN_PATCH",
|
71
|
+
"STAN_MATH_MAJOR", "STAN_MATH_MINOR", "STAN_MATH_PATCH",
|
72
|
+
|
73
|
+
# Reserved names from C++
|
74
|
+
"alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand",
|
75
|
+
"bitor", "bool", "break", "case", "catch", "char", "char16_t",
|
76
|
+
"char32_t", "class", "compl", "const", "constexpr", "const_cast",
|
77
|
+
"continue", "decltype", "default", "delete", "do", "double",
|
78
|
+
"dynamic_cast", "else", "enum", "explicit", "export", "extern",
|
79
|
+
"false", "float", "for", "friend", "goto", "if", "inline", "int",
|
80
|
+
"long", "mutable", "namespace", "new", "noexcept", "not", "not_eq",
|
81
|
+
"nullptr", "operator", "or", "or_eq", "private", "protected",
|
82
|
+
"public", "register", "reinterpret_cast", "return", "short", "signed",
|
83
|
+
"sizeof", "static", "static_assert", "static_cast", "struct",
|
84
|
+
"switch", "template", "this", "thread_local", "throw", "true", "try",
|
85
|
+
"typedef", "typeid", "typename", "union", "unsigned", "using",
|
86
|
+
"virtual", "void", "volatile", "wchar_t", "while", "xor", "xor_eq"
|
87
|
+
]
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.builtin_functions
|
91
|
+
@builtin_functions ||= Set.new [
|
92
|
+
# Integer-Valued Basic Functions
|
93
|
+
|
94
|
+
## Absolute functions
|
95
|
+
"abs", "int_step",
|
96
|
+
|
97
|
+
## Bound functions
|
98
|
+
"min", "max",
|
99
|
+
|
100
|
+
## Size functions
|
101
|
+
"size",
|
102
|
+
|
103
|
+
# Real-Valued Basic Functions
|
104
|
+
|
105
|
+
## Log probability function
|
106
|
+
"target", "get_lp",
|
107
|
+
|
108
|
+
## Logical functions
|
109
|
+
"step", "is_inf", "is_nan",
|
110
|
+
|
111
|
+
## Step-like functions
|
112
|
+
"fabs", "fdim", "fmin", "fmax", "fmod", "floor", "ceil", "round",
|
113
|
+
"trunc",
|
114
|
+
|
115
|
+
## Power and logarithm functions
|
116
|
+
"sqrt", "cbrt", "square", "exp", "exp2", "log", "log2", "log10",
|
117
|
+
"pow", "inv", "inv_sqrt", "inv_square",
|
118
|
+
|
119
|
+
## Trigonometric functions
|
120
|
+
"hypot", "cos", "sin", "tan", "acos", "asin", "atan", "atan2",
|
121
|
+
|
122
|
+
## Hyperbolic trigonometric functions
|
123
|
+
"cosh", "sinh", "tanh", "acosh", "asinh", "atanh",
|
124
|
+
|
125
|
+
## Link functions
|
126
|
+
"logit", "inv_logit", "inv_cloglog",
|
127
|
+
|
128
|
+
## Probability-related functions
|
129
|
+
"erf", "erfc", "Phi", "inv_Phi", "Phi_approx", "binary_log_loss",
|
130
|
+
"owens_t",
|
131
|
+
|
132
|
+
## Combinatorial functions
|
133
|
+
"beta", "inc_beta", "lbeta", "tgamma", "lgamma", "digamma",
|
134
|
+
"trigamma", "lmgamma", "gamma_p", "gamma_q",
|
135
|
+
"binomial_coefficient_log", "choose", "bessel_first_kind",
|
136
|
+
"bessel_second_kind", "modified_bessel_first_kind",
|
137
|
+
"log_modified_bessel_first_kind", "modified_bessel_second_kind",
|
138
|
+
"falling_factorial", "lchoose", "log_falling_factorial",
|
139
|
+
"rising_factorial", "log_rising_factorial",
|
140
|
+
|
141
|
+
## Composed functions
|
142
|
+
"expm1", "fma", "multiply_log", "ldexp", "lmultiply", "log1p",
|
143
|
+
"log1m", "log1p_exp", "log1m_exp", "log_diff_exp", "log_mix",
|
144
|
+
"log_sum_exp", "log_inv_logit", "log_inv_logit_diff",
|
145
|
+
"log1m_inv_logit",
|
146
|
+
|
147
|
+
## Special functions
|
148
|
+
"lambert_w0", "lambert_wm1",
|
149
|
+
|
150
|
+
# Complex-Valued Basic Functions
|
151
|
+
|
152
|
+
## Complex constructors and accessors
|
153
|
+
"to_complex", "get_real", "get_imag",
|
154
|
+
|
155
|
+
## Complex special functions
|
156
|
+
"arg", "norm", "conj", "proj", "polar",
|
157
|
+
|
158
|
+
# Array Operations
|
159
|
+
|
160
|
+
## Reductions
|
161
|
+
"sum", "prod", "log_sum_exp", "mean", "variance", "sd", "distance",
|
162
|
+
"squared_distance", "quantile",
|
163
|
+
|
164
|
+
## Array size and dimension function
|
165
|
+
"dims", "num_elements",
|
166
|
+
|
167
|
+
## Array broadcasting
|
168
|
+
"rep_array",
|
169
|
+
|
170
|
+
## Array concatenation
|
171
|
+
"append_array",
|
172
|
+
|
173
|
+
## Sorting functions
|
174
|
+
"sort_asc", "sort_desc", "sort_indices_asc", "sort_indices_desc",
|
175
|
+
"rank",
|
176
|
+
|
177
|
+
## Reversing functions
|
178
|
+
"reverse",
|
179
|
+
|
180
|
+
# Matrix Operations
|
181
|
+
|
182
|
+
## Integer-valued matrix size functions
|
183
|
+
"num_elements", "rows", "cols",
|
184
|
+
|
185
|
+
## Dot products and specialized products
|
186
|
+
"dot_product", "columns_dot_product", "rows_dot_product", "dot_self",
|
187
|
+
"columns_dot_self", "rows_dot_self", "tcrossprod", "crossprod",
|
188
|
+
"quad_form", "quad_form_diag", "quad_form_sym", "trace_quad_form",
|
189
|
+
"trace_gen_quad_form", "multiply_lower_tri_self_transpose",
|
190
|
+
"diag_pre_multiply", "diag_post_multiply",
|
191
|
+
|
192
|
+
## Broadcast functions
|
193
|
+
"rep_vector", "rep_row_vector", "rep_matrix",
|
194
|
+
"symmetrize_from_lower_tri",
|
195
|
+
|
196
|
+
## Diagonal matrix functions
|
197
|
+
"add_diag", "diagonal", "diag_matrix", "identity_matrix",
|
198
|
+
|
199
|
+
## Container construction functions
|
200
|
+
"linspaced_array", "linspaced_int_array", "linspaced_vector",
|
201
|
+
"linspaced_row_vector", "one_hot_int_array", "one_hot_array",
|
202
|
+
"one_hot_vector", "one_hot_row_vector", "ones_int_array",
|
203
|
+
"ones_array", "ones_vector", "ones_row_vector", "zeros_int_array",
|
204
|
+
"zeros_array", "zeros_vector", "zeros_row_vector", "uniform_simplex",
|
205
|
+
|
206
|
+
## Slicing and blocking functions
|
207
|
+
"col", "row", "block", "sub_col", "sub_row", "head", "tail",
|
208
|
+
"segment",
|
209
|
+
|
210
|
+
## Matrix concatenation
|
211
|
+
"append_col", "append_row",
|
212
|
+
|
213
|
+
## Special matrix functions
|
214
|
+
"softmax", "log_softmax", "cumulative_sum",
|
215
|
+
|
216
|
+
## Covariance functions
|
217
|
+
"cov_exp_quad",
|
218
|
+
|
219
|
+
## Linear algebra functions and solvers
|
220
|
+
"mdivide_left_tri_low", "mdivide_right_tri_low", "mdivide_left_spd",
|
221
|
+
"mdivide_right_spd", "matrix_exp", "matrix_exp_multiply",
|
222
|
+
"scale_matrix_exp_multiply", "matrix_power", "trace", "determinant",
|
223
|
+
"log_determinant", "inverse", "inverse_spd", "chol2inv",
|
224
|
+
"generalized_inverse", "eigenvalues_sym", "eigenvectors_sym",
|
225
|
+
"qr_thin_Q", "qr_thin_R", "qr_Q", "qr_R", "cholseky_decompose",
|
226
|
+
"singular_values", "svd_U", "svd_V",
|
227
|
+
|
228
|
+
# Sparse Matrix Operations
|
229
|
+
|
230
|
+
## Conversion functions
|
231
|
+
"csr_extract_w", "csr_extract_v", "csr_extract_u",
|
232
|
+
"csr_to_dense_matrix",
|
233
|
+
|
234
|
+
## Sparse matrix arithmetic
|
235
|
+
"csr_matrix_times_vector",
|
236
|
+
|
237
|
+
# Mixed Operations
|
238
|
+
"to_matrix", "to_vector", "to_row_vector", "to_array_2d",
|
239
|
+
"to_array_1d",
|
240
|
+
|
241
|
+
# Higher-Order Functions
|
242
|
+
|
243
|
+
## Algebraic equation solver
|
244
|
+
"algebra_solver", "algebra_solver_newton",
|
245
|
+
|
246
|
+
## Ordinary differential equation
|
247
|
+
"ode_rk45", "ode_rk45_tol", "ode_ckrk", "ode_ckrk_tol", "ode_adams",
|
248
|
+
"ode_adams_tol", "ode_bdf", "ode_bdf_tol", "ode_adjoint_tol_ctl",
|
249
|
+
|
250
|
+
## 1D integrator
|
251
|
+
"integrate_1d",
|
252
|
+
|
253
|
+
## Reduce-sum function
|
254
|
+
"reduce_sum", "reduce_sum_static",
|
255
|
+
|
256
|
+
## Map-rect function
|
257
|
+
"map_rect",
|
258
|
+
|
259
|
+
# Deprecated Functions
|
260
|
+
"integrate_ode_rk45", "integrate_ode", "integrate_ode_adams",
|
261
|
+
"integrate_ode_bdf",
|
262
|
+
|
263
|
+
# Hidden Markov Models
|
264
|
+
"hmm_marginal", "hmm_latent_rng", "hmm_hidden_state_prob"
|
265
|
+
]
|
266
|
+
end
|
267
|
+
|
268
|
+
def self.distributions
|
269
|
+
@distributions ||= Set.new(
|
270
|
+
[
|
271
|
+
# Discrete Distributions
|
272
|
+
|
273
|
+
## Binary Distributions
|
274
|
+
"bernoulli", "bernoulli_logit", "bernoulli_logit_glm",
|
275
|
+
|
276
|
+
## Bounded Discrete Distributions
|
277
|
+
"binomial", "binomial_logit", "beta_binomial", "hypergeometric",
|
278
|
+
"categorical", "categorical_logit_glm", "discrete_range",
|
279
|
+
"ordered_logistic", "ordered_logistic_glm", "ordered_probit",
|
280
|
+
|
281
|
+
## Unbounded Discrete Distributions
|
282
|
+
"neg_binomial", "neg_binomial_2", "neg_binomial_2_log",
|
283
|
+
"neg_binomial_2_log_glm", "poisson", "poisson_log",
|
284
|
+
"poisson_log_glm",
|
285
|
+
|
286
|
+
## Multivariate Discrete Distributions
|
287
|
+
"multinomial", "multinomial_logit",
|
288
|
+
|
289
|
+
# Continuous Distributions
|
290
|
+
|
291
|
+
## Unbounded Continuous Distributions
|
292
|
+
"normal", "std_normal", "normal_id_glm", "exp_mod_normal",
|
293
|
+
"skew_normal", "student_t", "cauchy", "double_exponential",
|
294
|
+
"logistic", "gumbel", "skew_double_exponential",
|
295
|
+
|
296
|
+
## Positive Continuous Distributions
|
297
|
+
"lognormal", "chi_square", "inv_chi_square",
|
298
|
+
"scaled_inv_chi_square", "exponential", "gamma", "inv_gamma",
|
299
|
+
"weibull", "frechet", "rayleigh",
|
300
|
+
|
301
|
+
## Positive Lower-Bounded Distributions
|
302
|
+
"pareto", "pareto_type_2", "wiener",
|
303
|
+
|
304
|
+
## Continuous Distributions on [0, 1]
|
305
|
+
"beta", "beta_proportion",
|
306
|
+
|
307
|
+
## Circular Distributions
|
308
|
+
"von_mises",
|
309
|
+
|
310
|
+
## Bounded Continuous Distributions
|
311
|
+
"uniform",
|
312
|
+
|
313
|
+
## Distributions over Unbounded Vectors
|
314
|
+
"multi_normal", "multi_normal_prec", "multi_normal_cholesky",
|
315
|
+
"multi_gp", "multi_gp_cholesky", "multi_student_t",
|
316
|
+
"gaussian_dlm_obs",
|
317
|
+
|
318
|
+
## Simplex Distributions
|
319
|
+
"dirichlet",
|
320
|
+
|
321
|
+
## Correlation Matrix Distributions
|
322
|
+
"lkj_corr", "lkj_corr_cholesky",
|
323
|
+
|
324
|
+
## Covariance Matrix Distributions
|
325
|
+
"wishart", "inv_wishart"
|
326
|
+
].product([
|
327
|
+
"", "_lpmf", "_lupmf", "_lpdf", "_lcdf", "_lccdf", "_rng", "_log",
|
328
|
+
"_cdf_log", "_ccdf_log"
|
329
|
+
]).map {|s| "#{s[0]}#{s[1]}"}
|
330
|
+
)
|
331
|
+
end
|
332
|
+
|
333
|
+
def self.constants
|
334
|
+
@constants ||= Set.new [
|
335
|
+
# Mathematical constants
|
336
|
+
"pi", "e", "sqrt2", "log2", "log10",
|
337
|
+
|
338
|
+
# Special values
|
339
|
+
"not_a_number", "positive_infinity", "negative_infinity",
|
340
|
+
"machine_precision"
|
341
|
+
]
|
342
|
+
end
|
343
|
+
|
344
|
+
state :root do
|
345
|
+
mixin :whitespace
|
346
|
+
rule %r/#include/, Comment::Preproc, :include
|
347
|
+
rule %r/#.*$/, Generic::Deleted
|
348
|
+
rule %r(
|
349
|
+
functions
|
350
|
+
|(?:transformed\s+)?data
|
351
|
+
|(?:transformed\s+)?parameters
|
352
|
+
|model
|
353
|
+
|generated\s+quantities
|
354
|
+
)x, Name::Namespace
|
355
|
+
rule %r(\{), Punctuation, :bracket_scope
|
356
|
+
mixin :scope
|
357
|
+
end
|
358
|
+
|
359
|
+
state :include do
|
360
|
+
rule %r((\s+)(\S+)(\s*)) do |m|
|
361
|
+
token Text, m[1]
|
362
|
+
token Comment::PreprocFile, m[2]
|
363
|
+
token Text, m[3]
|
364
|
+
pop!
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
state :whitespace do
|
369
|
+
rule %r(\n+)m, Text
|
370
|
+
rule %r(//(\\.|.)*?$), Comment::Single
|
371
|
+
mixin :inline_whitespace
|
372
|
+
end
|
373
|
+
|
374
|
+
state :inline_whitespace do
|
375
|
+
rule %r([ \t\r]+), Text
|
376
|
+
rule %r(/(\\\n)?[*].*?[*](\\\n)?/)m, Comment::Multiline
|
377
|
+
end
|
378
|
+
|
379
|
+
state :statements do
|
380
|
+
mixin :whitespace
|
381
|
+
rule %r/#include/, Comment::Preproc, :include
|
382
|
+
rule %r/#.*$/, Generic::Deleted
|
383
|
+
rule %r("), Str, :string
|
384
|
+
rule %r(
|
385
|
+
(
|
386
|
+
((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+|\d+)
|
387
|
+
(#{WS})[+-](#{WS})
|
388
|
+
((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+|\d+)i
|
389
|
+
)
|
390
|
+
|((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+|\d+)i
|
391
|
+
|((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+)
|
392
|
+
)mx, Num::Float
|
393
|
+
rule %r/\d+/, Num::Integer
|
394
|
+
rule %r(\*/), Error
|
395
|
+
rule OP, Operator
|
396
|
+
rule %r([\[\],.;]), Punctuation
|
397
|
+
rule %r([|](?![|])), Punctuation
|
398
|
+
rule %r(T\b), Keyword::Reserved
|
399
|
+
rule %r((lower|upper)\b), Name::Attribute
|
400
|
+
rule ID do |m|
|
401
|
+
name = m[0]
|
402
|
+
|
403
|
+
if self.class.keywords.include? name
|
404
|
+
token Keyword
|
405
|
+
elsif self.class.types.include? name
|
406
|
+
token Keyword::Type
|
407
|
+
elsif self.class.reserved.include? name
|
408
|
+
token Keyword::Reserved
|
409
|
+
else
|
410
|
+
token Name::Variable
|
411
|
+
end
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
state :scope do
|
416
|
+
mixin :whitespace
|
417
|
+
rule %r(
|
418
|
+
(#{RT}) # Return type
|
419
|
+
(#{ID}) # Function name
|
420
|
+
(?=\([^;]*?\)) # Signature or arguments
|
421
|
+
)mx do |m|
|
422
|
+
recurse m[1]
|
423
|
+
|
424
|
+
name = m[2]
|
425
|
+
if self.class.builtin_functions.include? name
|
426
|
+
token Name::Builtin, name
|
427
|
+
elsif self.class.distributions.include? name
|
428
|
+
token Name::Builtin, name
|
429
|
+
elsif self.class.constants.include? name
|
430
|
+
token Keyword::Constant
|
431
|
+
else
|
432
|
+
token Name::Function, name
|
433
|
+
end
|
434
|
+
end
|
435
|
+
rule %r(\{), Punctuation, :bracket_scope
|
436
|
+
rule %r(\(), Punctuation, :parens_scope
|
437
|
+
mixin :statements
|
438
|
+
end
|
439
|
+
|
440
|
+
state :bracket_scope do
|
441
|
+
mixin :scope
|
442
|
+
rule %r(\}), Punctuation, :pop!
|
443
|
+
end
|
444
|
+
|
445
|
+
state :parens_scope do
|
446
|
+
mixin :scope
|
447
|
+
rule %r(\)), Punctuation, :pop!
|
448
|
+
end
|
449
|
+
end
|
450
|
+
end
|
451
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
# -*- coding: utf-8 -*- #
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Rouge
|
5
|
+
module Lexers
|
6
|
+
class Stata < RegexLexer
|
7
|
+
title "Stata"
|
8
|
+
desc "The Stata programming language (www.stata.com)"
|
9
|
+
tag 'stata'
|
10
|
+
filenames '*.do', '*.ado'
|
11
|
+
mimetypes 'application/x-stata', 'text/x-stata'
|
12
|
+
|
13
|
+
###
|
14
|
+
# Stata reference manual is available online at: https://www.stata.com/features/documentation/
|
15
|
+
###
|
16
|
+
|
17
|
+
# Partial list of common programming and estimation commands, as of Stata 16
|
18
|
+
# Note: not all abbreviations are included
|
19
|
+
KEYWORDS = %w(
|
20
|
+
do run include clear assert set mata log
|
21
|
+
by bys bysort cap capt capture char class classutil which cdir confirm new existence creturn
|
22
|
+
_datasignature discard di dis disp displ displa display ereturn error _estimates exit file open read write seek close query findfile fvexpand
|
23
|
+
gettoken java home heapmax java_heapmax icd9 icd9p icd10 icd10cm icd10pcs initialize javacall levelsof
|
24
|
+
tempvar tempname tempfile macro shift uniq dups retokenize clean sizeof posof
|
25
|
+
makecns matcproc marksample mark markout markin svymarkout matlist
|
26
|
+
accum define dissimilarity eigenvalues get rowjoinbyname rownames score svd symeigen dir list ren rename
|
27
|
+
more pause plugin call postfile _predict preserve restore program define drop end python qui quietly noi noisily _return return _rmcoll rmsg _robust
|
28
|
+
serset locale_functions locale_ui signestimationsample checkestimationsample sleep syntax sysdir adopath adosize
|
29
|
+
tabdisp timer tokenize trace unab unabcmd varabbrev version viewsource
|
30
|
+
window fopen fsave manage menu push stopbox
|
31
|
+
net from cd link search install sj stb ado update uninstall pwd ssc ls
|
32
|
+
using insheet outsheet mkmat svmat sum summ summarize
|
33
|
+
graph gr_edit twoway histogram kdensity spikeplot
|
34
|
+
mi miss missing var varname order compress append
|
35
|
+
gen gene gener genera generat generate egen replace duplicates
|
36
|
+
estimates nlcom lincom test testnl predict suest
|
37
|
+
_regress reg regr regre regres regress probit logit ivregress logistic svy gmm ivprobit ivtobit
|
38
|
+
bsample assert codebook collapse compare contract copy count cross datasignature d ds desc describe destring tostring
|
39
|
+
drawnorm edit encode decode erase expand export filefilter fillin format frame frget frlink gsort
|
40
|
+
import dbase delimited excel fred haver sas sasxport5 sasxport8 spss infile infix input insobs inspect ipolate isid
|
41
|
+
joinby label language labelbook lookfor memory mem merge mkdir mvencode notes obs odbc order outfile
|
42
|
+
pctile xtile _pctile putmata range recast recode rename group reshape rm rmdir sample save saveold separate shell snapshot sort split splitsample stack statsby sysuse
|
43
|
+
type unicode use varmanage vl webuse xpose zipfile
|
44
|
+
number keep tab table tabulate stset stcox tsset xtset
|
45
|
+
)
|
46
|
+
|
47
|
+
# Complete list of functions by name, as of Stata 16
|
48
|
+
PRIMITIVE_FUNCTIONS = %w(
|
49
|
+
abbrev abs acos acosh age age_frac asin asinh atan atan2 atanh autocode
|
50
|
+
betaden binomial binomialp binomialtail binormal birthday bofd byteorder
|
51
|
+
c _caller cauchy cauchyden cauchytail Cdhms ceil char chi2 chi2den chi2tail Chms
|
52
|
+
chop cholesky clip Clock clock clockdiff cloglog Cmdyhms Cofc cofC Cofd cofd coleqnumb
|
53
|
+
collatorlocale collatorversion colnfreeparms colnumb colsof comb cond corr cos cosh
|
54
|
+
daily date datediff datediff_frac day det dgammapda dgammapdada dgammapdadx dgammapdxdx dhms
|
55
|
+
diag diag0cnt digamma dofb dofC dofc dofh dofm dofq dofw dofy dow doy dunnettprob e el epsdouble
|
56
|
+
epsfloat exp expm1 exponential exponentialden exponentialtail
|
57
|
+
F Fden fileexists fileread filereaderror filewrite float floor fmtwidth frval _frval Ftail
|
58
|
+
fammaden gammap gammaptail get hadamard halfyear halfyearly has_eprop hh hhC hms hofd hours
|
59
|
+
hypergeometric hypergeometricp
|
60
|
+
I ibeta ibetatail igaussian igaussianden igaussiantail indexnot inlist inrange int inv invbinomial invbinomialtail
|
61
|
+
invcauchy invcauchytail invchi2 invchi2tail invcloglog invdunnettprob invexponential invexponentialtail invF
|
62
|
+
invFtail invgammap invgammaptail invibeta invibetatail invigaussian invigaussiantail invlaplace invlaplacetail
|
63
|
+
invlogistic invlogistictail invlogit invnbinomial invnbinomialtail invnchi2 invnchi2tail invnF invnFtail invnibeta invnormal invnt invnttail
|
64
|
+
invpoisson invpoissontail invsym invt invttail invtukeyprob invweibull invweibullph invweibullphtail invweibulltail irecode islepyear issymmetric
|
65
|
+
J laplace laplaceden laplacetail ln ln1m ln1p lncauchyden lnfactorial lngamma lnigammaden lnigaussianden lniwishartden lnlaplaceden lnmvnormalden
|
66
|
+
lnnormal lnnormalden lnnormalden lnnormalden lnwishartden log log10 log1m log1p logistic logisticden logistictail logit
|
67
|
+
matmissing matrix matuniform max maxbyte maxdouble maxfloat maxint maxlong mdy mdyhms mi min minbyte mindouble minfloat minint minlong minutes
|
68
|
+
missing mm mmC mod mofd month monthly mreldif msofhours msofminutes msofseconds
|
69
|
+
nbetaden nbinomial nbinomialp nbinomialtail nchi2 nchi2den nchi2tail nextbirthday nextleapyear nF nFden nFtail nibeta
|
70
|
+
normal normalden npnchi2 npnF npnt nt ntden nttail nullmat
|
71
|
+
plural poisson poissonp poissontail previousbirthday previousleapyear qofd quarter quarterly r rbeta rbinomial rcauchy rchi2 recode
|
72
|
+
real regexm regexr regexs reldif replay return rexponential rgamma rhypergeometric rigaussian rlaplace rlogistic rnormal
|
73
|
+
round roweqnumb rownfreeparms rownumb rowsof rpoisson rt runiform runiformint rweibull rweibullph
|
74
|
+
s scalar seconds sign sin sinh smallestdouble soundex soundex_nara sqrt ss ssC strcat strdup string stritrim strlen strlower
|
75
|
+
strltrim strmatch strofreal strpos strproper strreverse strrpos strrtrim strtoname strtrim strupper subinstr subinword substr sum sweep
|
76
|
+
t tan tanh tC tc td tden th tin tm tobytes tq trace trigamma trunc ttail tukeyprob tw twithin
|
77
|
+
uchar udstrlen udsubstr uisdigit uisletter uniform ustrcompare ustrcompareex ustrfix ustrfrom ustrinvalidcnt ustrleft ustrlen ustrlower
|
78
|
+
ustrltrim ustrnormalize ustrpos ustrregexm ustrregexra ustrregexrf ustrregexs ustrreverse ustrright ustrrpos ustrrtrim ustrsortkey
|
79
|
+
ustrsortkeyex ustrtitle ustrto ustrtohex ustrtoname ustrtrim ustrunescape ustrupper ustrword ustrwordcount usubinstr usubstr
|
80
|
+
vec vecdiag week weekly weibull weibullden weibullph weibullphden weibullphtail weibulltail wofd word wordbreaklocale wordcount
|
81
|
+
year yearly yh ym yofd yq yw
|
82
|
+
)
|
83
|
+
|
84
|
+
# Note: types `str1-str2045` handled separately below
|
85
|
+
def self.type_keywords
|
86
|
+
@type_keywords ||= Set.new %w(byte int long float double str strL numeric string integer scalar matrix local global numlist varlist newlist)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Stata commands used with braces. Includes all valid abbreviations for 'forvalues'.
|
90
|
+
def self.reserved_keywords
|
91
|
+
@reserved_keywords ||= Set.new %w(if else foreach forv forva forval forvalu forvalue forvalues to while in of continue break nobreak)
|
92
|
+
end
|
93
|
+
|
94
|
+
###
|
95
|
+
# Lexer state and rules
|
96
|
+
###
|
97
|
+
state :root do
|
98
|
+
|
99
|
+
# Pre-processor commands: #
|
100
|
+
rule %r/^\s*#.*$/, Comment::Preproc
|
101
|
+
|
102
|
+
# Hashbang comments: *!
|
103
|
+
rule %r/^\*!.*$/, Comment::Hashbang
|
104
|
+
|
105
|
+
# Single-line comment: *
|
106
|
+
rule %r/^\s*\*.*$/, Comment::Single
|
107
|
+
|
108
|
+
# Keywords: recognize only when they are the first word
|
109
|
+
rule %r/^\s*(#{KEYWORDS.join('|')})\b/, Keyword
|
110
|
+
|
111
|
+
# Whitespace. Classify `\n` as `Text` to avoid interference with `Comment` and `Keyword` above
|
112
|
+
rule(/[ \t]+/, Text::Whitespace)
|
113
|
+
rule(/[\n\r]+/, Text)
|
114
|
+
|
115
|
+
# In-line comment: //
|
116
|
+
rule %r/\/\/.*?$/, Comment::Single
|
117
|
+
|
118
|
+
# Multi-line comment: /* and */
|
119
|
+
rule %r(/(\\\n)?[*].*?[*](\\\n)?/)m, Comment::Multiline
|
120
|
+
|
121
|
+
# Strings indicated by compound double-quotes (`""') and double-quotes ("")
|
122
|
+
rule %r/`"(\\.|.)*?"'/, Str::Double
|
123
|
+
rule %r/"(\\.|.)*?"/, Str::Double
|
124
|
+
|
125
|
+
# Format locals (`') and globals ($) as strings
|
126
|
+
rule %r/`(\\.|.)*?'/, Str::Double
|
127
|
+
rule %r/(?<!\w)\$\w+/, Str::Double
|
128
|
+
|
129
|
+
# Display formats
|
130
|
+
rule %r/\%\S+/, Name::Property
|
131
|
+
|
132
|
+
# Additional string types: str1-str2045
|
133
|
+
rule %r/\bstr(204[0-5]|20[0-3][0-9]|[01][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[1-9])\b/, Keyword::Type
|
134
|
+
|
135
|
+
# Only recognize primitive functions when they are actually used as a function call, i.e. followed by an opening parenthesis
|
136
|
+
# `Name::Builtin` would be more logical, but is not usually highlighted, so use `Name::Function` instead
|
137
|
+
rule %r/\b(#{PRIMITIVE_FUNCTIONS.join('|')})(?=\()/, Name::Function
|
138
|
+
|
139
|
+
# Matrix operator `..` (declare here instead of with other operators, in order to avoid conflict with numbers below)
|
140
|
+
rule %r/\.\.(?=.*\])/, Operator
|
141
|
+
|
142
|
+
# Numbers
|
143
|
+
rule %r/[+-]?(\d+([.]\d+)?|[.]\d+)([eE][+-]?\d+)?/, Num
|
144
|
+
|
145
|
+
# Factor variable and time series operators
|
146
|
+
rule %r/\b[ICOicoLFDSlfds]\w*\./, Operator
|
147
|
+
rule %r/\b[ICOicoLFDSlfds]\w*(?=\(.*\)\.)/, Operator
|
148
|
+
|
149
|
+
rule %r/\w+/ do |m|
|
150
|
+
if self.class.reserved_keywords.include? m[0]
|
151
|
+
token Keyword::Reserved
|
152
|
+
elsif self.class.type_keywords.include? m[0]
|
153
|
+
token Keyword::Type
|
154
|
+
else
|
155
|
+
token Name
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
rule %r/[\[\]{}();,]/, Punctuation
|
160
|
+
|
161
|
+
rule %r([-<>?*+'^/\\!#.=~:&|]), Operator
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|