github-linguist 5.3.1 → 5.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/linguist/extconf.rb +3 -0
- data/ext/linguist/lex.linguist_yy.c +8269 -0
- data/ext/linguist/lex.linguist_yy.h +353 -0
- data/ext/linguist/linguist.c +64 -0
- data/ext/linguist/linguist.h +11 -0
- data/ext/linguist/tokenizer.l +119 -0
- data/grammars/source.coffee.json +123 -41
- data/grammars/source.crystal.json +2 -2
- data/grammars/source.css.less.json +319 -27
- data/grammars/source.glsl.json +1 -1
- data/grammars/source.js.json +6 -2
- data/grammars/source.meson.json +1 -1
- data/grammars/source.tsx.json +4 -14
- data/grammars/source.wdl.json +2 -2
- data/grammars/text.roff.json +155 -41
- data/grammars/text.shell-session.json +1 -1
- data/lib/linguist/blob_helper.rb +47 -4
- data/lib/linguist/classifier.rb +3 -1
- data/lib/linguist/file_blob.rb +3 -3
- data/lib/linguist/heuristics.rb +15 -6
- data/lib/linguist/linguist.bundle +0 -0
- data/lib/linguist/samples.json +49989 -44225
- data/lib/linguist/strategy/modeline.rb +2 -2
- data/lib/linguist/tokenizer.rb +1 -186
- data/lib/linguist/version.rb +1 -1
- metadata +25 -3
@@ -0,0 +1,353 @@
|
|
1
|
+
#ifndef linguist_yyHEADER_H
|
2
|
+
#define linguist_yyHEADER_H 1
|
3
|
+
#define linguist_yyIN_HEADER 1
|
4
|
+
|
5
|
+
#line 6 "lex.linguist_yy.h"
|
6
|
+
|
7
|
+
#define YY_INT_ALIGNED short int
|
8
|
+
|
9
|
+
/* A lexical scanner generated by flex */
|
10
|
+
|
11
|
+
#define FLEX_SCANNER
|
12
|
+
#define YY_FLEX_MAJOR_VERSION 2
|
13
|
+
#define YY_FLEX_MINOR_VERSION 5
|
14
|
+
#define YY_FLEX_SUBMINOR_VERSION 39
|
15
|
+
#if YY_FLEX_SUBMINOR_VERSION > 0
|
16
|
+
#define FLEX_BETA
|
17
|
+
#endif
|
18
|
+
|
19
|
+
/* First, we deal with platform-specific or compiler-specific issues. */
|
20
|
+
|
21
|
+
/* begin standard C headers. */
|
22
|
+
#include <stdio.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include <errno.h>
|
25
|
+
#include <stdlib.h>
|
26
|
+
|
27
|
+
/* end standard C headers. */
|
28
|
+
|
29
|
+
/* flex integer type definitions */
|
30
|
+
|
31
|
+
#ifndef FLEXINT_H
|
32
|
+
#define FLEXINT_H
|
33
|
+
|
34
|
+
/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
|
35
|
+
|
36
|
+
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
37
|
+
|
38
|
+
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
|
39
|
+
* if you want the limit (max/min) macros for int types.
|
40
|
+
*/
|
41
|
+
#ifndef __STDC_LIMIT_MACROS
|
42
|
+
#define __STDC_LIMIT_MACROS 1
|
43
|
+
#endif
|
44
|
+
|
45
|
+
#include <inttypes.h>
|
46
|
+
typedef int8_t flex_int8_t;
|
47
|
+
typedef uint8_t flex_uint8_t;
|
48
|
+
typedef int16_t flex_int16_t;
|
49
|
+
typedef uint16_t flex_uint16_t;
|
50
|
+
typedef int32_t flex_int32_t;
|
51
|
+
typedef uint32_t flex_uint32_t;
|
52
|
+
#else
|
53
|
+
typedef signed char flex_int8_t;
|
54
|
+
typedef short int flex_int16_t;
|
55
|
+
typedef int flex_int32_t;
|
56
|
+
typedef unsigned char flex_uint8_t;
|
57
|
+
typedef unsigned short int flex_uint16_t;
|
58
|
+
typedef unsigned int flex_uint32_t;
|
59
|
+
|
60
|
+
/* Limits of integral types. */
|
61
|
+
#ifndef INT8_MIN
|
62
|
+
#define INT8_MIN (-128)
|
63
|
+
#endif
|
64
|
+
#ifndef INT16_MIN
|
65
|
+
#define INT16_MIN (-32767-1)
|
66
|
+
#endif
|
67
|
+
#ifndef INT32_MIN
|
68
|
+
#define INT32_MIN (-2147483647-1)
|
69
|
+
#endif
|
70
|
+
#ifndef INT8_MAX
|
71
|
+
#define INT8_MAX (127)
|
72
|
+
#endif
|
73
|
+
#ifndef INT16_MAX
|
74
|
+
#define INT16_MAX (32767)
|
75
|
+
#endif
|
76
|
+
#ifndef INT32_MAX
|
77
|
+
#define INT32_MAX (2147483647)
|
78
|
+
#endif
|
79
|
+
#ifndef UINT8_MAX
|
80
|
+
#define UINT8_MAX (255U)
|
81
|
+
#endif
|
82
|
+
#ifndef UINT16_MAX
|
83
|
+
#define UINT16_MAX (65535U)
|
84
|
+
#endif
|
85
|
+
#ifndef UINT32_MAX
|
86
|
+
#define UINT32_MAX (4294967295U)
|
87
|
+
#endif
|
88
|
+
|
89
|
+
#endif /* ! C99 */
|
90
|
+
|
91
|
+
#endif /* ! FLEXINT_H */
|
92
|
+
|
93
|
+
#ifdef __cplusplus
|
94
|
+
|
95
|
+
/* The "const" storage-class-modifier is valid. */
|
96
|
+
#define YY_USE_CONST
|
97
|
+
|
98
|
+
#else /* ! __cplusplus */
|
99
|
+
|
100
|
+
/* C99 requires __STDC__ to be defined as 1. */
|
101
|
+
#if defined (__STDC__)
|
102
|
+
|
103
|
+
#define YY_USE_CONST
|
104
|
+
|
105
|
+
#endif /* defined (__STDC__) */
|
106
|
+
#endif /* ! __cplusplus */
|
107
|
+
|
108
|
+
#ifdef YY_USE_CONST
|
109
|
+
#define yyconst const
|
110
|
+
#else
|
111
|
+
#define yyconst
|
112
|
+
#endif
|
113
|
+
|
114
|
+
/* An opaque pointer. */
|
115
|
+
#ifndef YY_TYPEDEF_YY_SCANNER_T
|
116
|
+
#define YY_TYPEDEF_YY_SCANNER_T
|
117
|
+
typedef void* yyscan_t;
|
118
|
+
#endif
|
119
|
+
|
120
|
+
/* For convenience, these vars (plus the bison vars far below)
|
121
|
+
are macros in the reentrant scanner. */
|
122
|
+
#define yyin yyg->yyin_r
|
123
|
+
#define yyout yyg->yyout_r
|
124
|
+
#define yyextra yyg->yyextra_r
|
125
|
+
#define yyleng yyg->yyleng_r
|
126
|
+
#define yytext yyg->yytext_r
|
127
|
+
#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
|
128
|
+
#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
|
129
|
+
#define yy_flex_debug yyg->yy_flex_debug_r
|
130
|
+
|
131
|
+
/* Size of default input buffer. */
|
132
|
+
#ifndef YY_BUF_SIZE
|
133
|
+
#ifdef __ia64__
|
134
|
+
/* On IA-64, the buffer size is 16k, not 8k.
|
135
|
+
* Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
|
136
|
+
* Ditto for the __ia64__ case accordingly.
|
137
|
+
*/
|
138
|
+
#define YY_BUF_SIZE 32768
|
139
|
+
#else
|
140
|
+
#define YY_BUF_SIZE 16384
|
141
|
+
#endif /* __ia64__ */
|
142
|
+
#endif
|
143
|
+
|
144
|
+
#ifndef YY_TYPEDEF_YY_BUFFER_STATE
|
145
|
+
#define YY_TYPEDEF_YY_BUFFER_STATE
|
146
|
+
typedef struct yy_buffer_state *YY_BUFFER_STATE;
|
147
|
+
#endif
|
148
|
+
|
149
|
+
#ifndef YY_TYPEDEF_YY_SIZE_T
|
150
|
+
#define YY_TYPEDEF_YY_SIZE_T
|
151
|
+
typedef size_t yy_size_t;
|
152
|
+
#endif
|
153
|
+
|
154
|
+
#ifndef YY_STRUCT_YY_BUFFER_STATE
|
155
|
+
#define YY_STRUCT_YY_BUFFER_STATE
|
156
|
+
struct yy_buffer_state
|
157
|
+
{
|
158
|
+
FILE *yy_input_file;
|
159
|
+
|
160
|
+
char *yy_ch_buf; /* input buffer */
|
161
|
+
char *yy_buf_pos; /* current position in input buffer */
|
162
|
+
|
163
|
+
/* Size of input buffer in bytes, not including room for EOB
|
164
|
+
* characters.
|
165
|
+
*/
|
166
|
+
yy_size_t yy_buf_size;
|
167
|
+
|
168
|
+
/* Number of characters read into yy_ch_buf, not including EOB
|
169
|
+
* characters.
|
170
|
+
*/
|
171
|
+
yy_size_t yy_n_chars;
|
172
|
+
|
173
|
+
/* Whether we "own" the buffer - i.e., we know we created it,
|
174
|
+
* and can realloc() it to grow it, and should free() it to
|
175
|
+
* delete it.
|
176
|
+
*/
|
177
|
+
int yy_is_our_buffer;
|
178
|
+
|
179
|
+
/* Whether this is an "interactive" input source; if so, and
|
180
|
+
* if we're using stdio for input, then we want to use getc()
|
181
|
+
* instead of fread(), to make sure we stop fetching input after
|
182
|
+
* each newline.
|
183
|
+
*/
|
184
|
+
int yy_is_interactive;
|
185
|
+
|
186
|
+
/* Whether we're considered to be at the beginning of a line.
|
187
|
+
* If so, '^' rules will be active on the next match, otherwise
|
188
|
+
* not.
|
189
|
+
*/
|
190
|
+
int yy_at_bol;
|
191
|
+
|
192
|
+
int yy_bs_lineno; /**< The line count. */
|
193
|
+
int yy_bs_column; /**< The column count. */
|
194
|
+
|
195
|
+
/* Whether to try to fill the input buffer when we reach the
|
196
|
+
* end of it.
|
197
|
+
*/
|
198
|
+
int yy_fill_buffer;
|
199
|
+
|
200
|
+
int yy_buffer_status;
|
201
|
+
|
202
|
+
};
|
203
|
+
#endif /* !YY_STRUCT_YY_BUFFER_STATE */
|
204
|
+
|
205
|
+
void linguist_yyrestart (FILE *input_file ,yyscan_t yyscanner );
|
206
|
+
void linguist_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
|
207
|
+
YY_BUFFER_STATE linguist_yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
|
208
|
+
void linguist_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
|
209
|
+
void linguist_yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
|
210
|
+
void linguist_yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
|
211
|
+
void linguist_yypop_buffer_state (yyscan_t yyscanner );
|
212
|
+
|
213
|
+
YY_BUFFER_STATE linguist_yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
|
214
|
+
YY_BUFFER_STATE linguist_yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
|
215
|
+
YY_BUFFER_STATE linguist_yy_scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner );
|
216
|
+
|
217
|
+
void *linguist_yyalloc (yy_size_t ,yyscan_t yyscanner );
|
218
|
+
void *linguist_yyrealloc (void *,yy_size_t ,yyscan_t yyscanner );
|
219
|
+
void linguist_yyfree (void * ,yyscan_t yyscanner );
|
220
|
+
|
221
|
+
/* Begin user sect3 */
|
222
|
+
|
223
|
+
#define yytext_ptr yytext_r
|
224
|
+
|
225
|
+
#ifdef YY_HEADER_EXPORT_START_CONDITIONS
|
226
|
+
#define INITIAL 0
|
227
|
+
#define sgml 1
|
228
|
+
#define c_comment 2
|
229
|
+
#define xml_comment 3
|
230
|
+
#define haskell_comment 4
|
231
|
+
#define ocaml_comment 5
|
232
|
+
#define python_dcomment 6
|
233
|
+
#define python_scomment 7
|
234
|
+
|
235
|
+
#endif
|
236
|
+
|
237
|
+
#ifndef YY_NO_UNISTD_H
|
238
|
+
/* Special case for "unistd.h", since it is non-ANSI. We include it way
|
239
|
+
* down here because we want the user's section 1 to have been scanned first.
|
240
|
+
* The user has a chance to override it with an option.
|
241
|
+
*/
|
242
|
+
#include <unistd.h>
|
243
|
+
#endif
|
244
|
+
|
245
|
+
#define YY_EXTRA_TYPE struct tokenizer_extra *
|
246
|
+
|
247
|
+
int linguist_yylex_init (yyscan_t* scanner);
|
248
|
+
|
249
|
+
int linguist_yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
|
250
|
+
|
251
|
+
/* Accessor methods to globals.
|
252
|
+
These are made visible to non-reentrant scanners for convenience. */
|
253
|
+
|
254
|
+
int linguist_yylex_destroy (yyscan_t yyscanner );
|
255
|
+
|
256
|
+
int linguist_yyget_debug (yyscan_t yyscanner );
|
257
|
+
|
258
|
+
void linguist_yyset_debug (int debug_flag ,yyscan_t yyscanner );
|
259
|
+
|
260
|
+
YY_EXTRA_TYPE linguist_yyget_extra (yyscan_t yyscanner );
|
261
|
+
|
262
|
+
void linguist_yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
|
263
|
+
|
264
|
+
FILE *linguist_yyget_in (yyscan_t yyscanner );
|
265
|
+
|
266
|
+
void linguist_yyset_in (FILE * in_str ,yyscan_t yyscanner );
|
267
|
+
|
268
|
+
FILE *linguist_yyget_out (yyscan_t yyscanner );
|
269
|
+
|
270
|
+
void linguist_yyset_out (FILE * out_str ,yyscan_t yyscanner );
|
271
|
+
|
272
|
+
yy_size_t linguist_yyget_leng (yyscan_t yyscanner );
|
273
|
+
|
274
|
+
char *linguist_yyget_text (yyscan_t yyscanner );
|
275
|
+
|
276
|
+
int linguist_yyget_lineno (yyscan_t yyscanner );
|
277
|
+
|
278
|
+
void linguist_yyset_lineno (int line_number ,yyscan_t yyscanner );
|
279
|
+
|
280
|
+
int linguist_yyget_column (yyscan_t yyscanner );
|
281
|
+
|
282
|
+
void linguist_yyset_column (int column_no ,yyscan_t yyscanner );
|
283
|
+
|
284
|
+
/* Macros after this point can all be overridden by user definitions in
|
285
|
+
* section 1.
|
286
|
+
*/
|
287
|
+
|
288
|
+
#ifndef YY_SKIP_YYWRAP
|
289
|
+
#ifdef __cplusplus
|
290
|
+
extern "C" int linguist_yywrap (yyscan_t yyscanner );
|
291
|
+
#else
|
292
|
+
extern int linguist_yywrap (yyscan_t yyscanner );
|
293
|
+
#endif
|
294
|
+
#endif
|
295
|
+
|
296
|
+
#ifndef yytext_ptr
|
297
|
+
static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
|
298
|
+
#endif
|
299
|
+
|
300
|
+
#ifdef YY_NEED_STRLEN
|
301
|
+
static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
|
302
|
+
#endif
|
303
|
+
|
304
|
+
#ifndef YY_NO_INPUT
|
305
|
+
|
306
|
+
#endif
|
307
|
+
|
308
|
+
/* Amount of stuff to slurp up with each read. */
|
309
|
+
#ifndef YY_READ_BUF_SIZE
|
310
|
+
#ifdef __ia64__
|
311
|
+
/* On IA-64, the buffer size is 16k, not 8k */
|
312
|
+
#define YY_READ_BUF_SIZE 16384
|
313
|
+
#else
|
314
|
+
#define YY_READ_BUF_SIZE 8192
|
315
|
+
#endif /* __ia64__ */
|
316
|
+
#endif
|
317
|
+
|
318
|
+
/* Number of entries by which start-condition stack grows. */
|
319
|
+
#ifndef YY_START_STACK_INCR
|
320
|
+
#define YY_START_STACK_INCR 25
|
321
|
+
#endif
|
322
|
+
|
323
|
+
/* Default declaration of generated scanner - a define so the user can
|
324
|
+
* easily add parameters.
|
325
|
+
*/
|
326
|
+
#ifndef YY_DECL
|
327
|
+
#define YY_DECL_IS_OURS 1
|
328
|
+
|
329
|
+
extern int linguist_yylex (yyscan_t yyscanner);
|
330
|
+
|
331
|
+
#define YY_DECL int linguist_yylex (yyscan_t yyscanner)
|
332
|
+
#endif /* !YY_DECL */
|
333
|
+
|
334
|
+
/* yy_get_previous_state - get the state just before the EOB char was reached */
|
335
|
+
|
336
|
+
#undef YY_NEW_FILE
|
337
|
+
#undef YY_FLUSH_BUFFER
|
338
|
+
#undef yy_set_bol
|
339
|
+
#undef yy_new_buffer
|
340
|
+
#undef yy_set_interactive
|
341
|
+
#undef YY_DO_BEFORE_ACTION
|
342
|
+
|
343
|
+
#ifdef YY_DECL_IS_OURS
|
344
|
+
#undef YY_DECL_IS_OURS
|
345
|
+
#undef YY_DECL
|
346
|
+
#endif
|
347
|
+
|
348
|
+
#line 117 "tokenizer.l"
|
349
|
+
|
350
|
+
|
351
|
+
#line 352 "lex.linguist_yy.h"
|
352
|
+
#undef linguist_yyIN_HEADER
|
353
|
+
#endif /* linguist_yyHEADER_H */
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "linguist.h"
|
3
|
+
#include "lex.linguist_yy.h"
|
4
|
+
|
5
|
+
int linguist_yywrap(yyscan_t yyscanner) {
|
6
|
+
return 1;
|
7
|
+
}
|
8
|
+
|
9
|
+
static VALUE rb_tokenizer_extract_tokens(VALUE self, VALUE rb_data) {
|
10
|
+
YY_BUFFER_STATE buf;
|
11
|
+
yyscan_t scanner;
|
12
|
+
struct tokenizer_extra extra;
|
13
|
+
VALUE ary, s;
|
14
|
+
long len;
|
15
|
+
int r;
|
16
|
+
|
17
|
+
Check_Type(rb_data, T_STRING);
|
18
|
+
|
19
|
+
len = RSTRING_LEN(rb_data);
|
20
|
+
if (len > 100000)
|
21
|
+
len = 100000;
|
22
|
+
|
23
|
+
linguist_yylex_init_extra(&extra, &scanner);
|
24
|
+
buf = linguist_yy_scan_bytes(RSTRING_PTR(rb_data), (int) len, scanner);
|
25
|
+
|
26
|
+
ary = rb_ary_new();
|
27
|
+
do {
|
28
|
+
extra.type = NO_ACTION;
|
29
|
+
extra.token = NULL;
|
30
|
+
r = linguist_yylex(scanner);
|
31
|
+
switch (extra.type) {
|
32
|
+
case NO_ACTION:
|
33
|
+
break;
|
34
|
+
case REGULAR_TOKEN:
|
35
|
+
rb_ary_push(ary, rb_str_new2(extra.token));
|
36
|
+
free(extra.token);
|
37
|
+
break;
|
38
|
+
case SHEBANG_TOKEN:
|
39
|
+
s = rb_str_new2("SHEBANG#!");
|
40
|
+
rb_str_cat2(s, extra.token);
|
41
|
+
rb_ary_push(ary, s);
|
42
|
+
free(extra.token);
|
43
|
+
break;
|
44
|
+
case SGML_TOKEN:
|
45
|
+
s = rb_str_new2(extra.token);
|
46
|
+
rb_str_cat2(s, ">");
|
47
|
+
rb_ary_push(ary, s);
|
48
|
+
free(extra.token);
|
49
|
+
break;
|
50
|
+
}
|
51
|
+
} while (r);
|
52
|
+
|
53
|
+
linguist_yy_delete_buffer(buf, scanner);
|
54
|
+
linguist_yylex_destroy(scanner);
|
55
|
+
|
56
|
+
return ary;
|
57
|
+
}
|
58
|
+
|
59
|
+
__attribute__((visibility("default"))) void Init_linguist() {
|
60
|
+
VALUE rb_mLinguist = rb_define_module("Linguist");
|
61
|
+
VALUE rb_cTokenizer = rb_define_class_under(rb_mLinguist, "Tokenizer", rb_cObject);
|
62
|
+
|
63
|
+
rb_define_method(rb_cTokenizer, "extract_tokens", rb_tokenizer_extract_tokens, 1);
|
64
|
+
}
|
@@ -0,0 +1,119 @@
|
|
1
|
+
%{
|
2
|
+
|
3
|
+
#include "linguist.h"
|
4
|
+
|
5
|
+
#define feed_token(tok, typ) do { \
|
6
|
+
yyextra->token = (tok); \
|
7
|
+
yyextra->type = (typ); \
|
8
|
+
} while (0)
|
9
|
+
|
10
|
+
#define eat_until_eol() do { \
|
11
|
+
int c; \
|
12
|
+
while ((c = input(yyscanner)) != '\n' && c != EOF); \
|
13
|
+
if (c == EOF) \
|
14
|
+
yyterminate(); \
|
15
|
+
} while (0)
|
16
|
+
|
17
|
+
#define eat_until_unescaped(q) do { \
|
18
|
+
int c; \
|
19
|
+
while ((c = input(yyscanner)) != EOF) { \
|
20
|
+
if (c == '\n') \
|
21
|
+
break; \
|
22
|
+
if (c == '\\') { \
|
23
|
+
c = input(yyscanner); \
|
24
|
+
if (c == EOF) \
|
25
|
+
yyterminate(); \
|
26
|
+
} else if (c == q) \
|
27
|
+
break; \
|
28
|
+
} \
|
29
|
+
if (c == EOF) \
|
30
|
+
yyterminate(); \
|
31
|
+
} while (0)
|
32
|
+
|
33
|
+
%}
|
34
|
+
|
35
|
+
%option never-interactive yywrap reentrant nounput warn nodefault header-file="lex.linguist_yy.h" extra-type="struct tokenizer_extra *" prefix="linguist_yy"
|
36
|
+
%x sgml c_comment xml_comment haskell_comment ocaml_comment python_dcomment python_scomment
|
37
|
+
|
38
|
+
%%
|
39
|
+
|
40
|
+
^#![ \t]*([[:alnum:]_\/]*\/)?env([ \t]+([^ \t=]*=[^ \t]*))*[ \t]+[[:alpha:]_]+ {
|
41
|
+
const char *off = strrchr(yytext, ' ');
|
42
|
+
if (!off)
|
43
|
+
off = yytext;
|
44
|
+
else
|
45
|
+
++off;
|
46
|
+
feed_token(strdup(off), SHEBANG_TOKEN);
|
47
|
+
eat_until_eol();
|
48
|
+
return 1;
|
49
|
+
}
|
50
|
+
|
51
|
+
^#![ \t]*[[:alpha:]_\/]+ {
|
52
|
+
const char *off = strrchr(yytext, '/');
|
53
|
+
if (!off)
|
54
|
+
off = yytext;
|
55
|
+
else
|
56
|
+
++off;
|
57
|
+
if (strcmp(off, "env") == 0) {
|
58
|
+
eat_until_eol();
|
59
|
+
} else {
|
60
|
+
feed_token(strdup(off), SHEBANG_TOKEN);
|
61
|
+
eat_until_eol();
|
62
|
+
return 1;
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
^[ \t]*(\/\/|--|\#|%|\")" ".* { /* nothing */ }
|
67
|
+
|
68
|
+
"/*" { BEGIN(c_comment); }
|
69
|
+
/* See below for xml_comment start. */
|
70
|
+
"{-" { BEGIN(haskell_comment); }
|
71
|
+
"(*" { BEGIN(ocaml_comment); }
|
72
|
+
"\"\"\"" { BEGIN(python_dcomment); }
|
73
|
+
"'''" { BEGIN(python_scomment); }
|
74
|
+
|
75
|
+
<c_comment,xml_comment,haskell_comment,ocaml_comment,python_dcomment,python_scomment>.|\n { /* nothing */ }
|
76
|
+
<c_comment>"*/" { BEGIN(INITIAL); }
|
77
|
+
<xml_comment>"-->" { BEGIN(INITIAL); }
|
78
|
+
<haskell_comment>"-}" { BEGIN(INITIAL); }
|
79
|
+
<ocaml_comment>"*)" { BEGIN(INITIAL); }
|
80
|
+
<python_dcomment>"\"\"\"" { BEGIN(INITIAL); }
|
81
|
+
<python_scomment>"'''" { BEGIN(INITIAL); }
|
82
|
+
|
83
|
+
\"\"|'' { /* nothing */ }
|
84
|
+
\" { eat_until_unescaped('"'); }
|
85
|
+
' { eat_until_unescaped('\''); }
|
86
|
+
(0x[0-9a-fA-F]([0-9a-fA-F]|\.)*|[0-9]([0-9]|\.)*)([uU][lL]{0,2}|([eE][-+][0-9]*)?[fFlL]*) { /* nothing */ }
|
87
|
+
\<[^ \t\n\r<>]+/>|" "[^<>\n]{0,2048}> {
|
88
|
+
if (strcmp(yytext, "<!--") == 0) {
|
89
|
+
BEGIN(xml_comment);
|
90
|
+
} else {
|
91
|
+
feed_token(strdup(yytext), SGML_TOKEN);
|
92
|
+
BEGIN(sgml);
|
93
|
+
return 1;
|
94
|
+
}
|
95
|
+
}
|
96
|
+
<sgml>[[:alnum:]_]+=/\" { feed_token(strdup(yytext), REGULAR_TOKEN); input(yyscanner); eat_until_unescaped('"'); return 1; }
|
97
|
+
<sgml>[[:alnum:]_]+=/' { feed_token(strdup(yytext), REGULAR_TOKEN); input(yyscanner); eat_until_unescaped('\''); return 1; }
|
98
|
+
<sgml>[[:alnum:]_]+=[[:alnum:]_]* { feed_token(strdup(yytext), REGULAR_TOKEN); *(strchr(yyextra->token, '=') + 1) = 0; return 1; }
|
99
|
+
<sgml>[[:alnum:]_]+ { feed_token(strdup(yytext), REGULAR_TOKEN); return 1; }
|
100
|
+
<sgml>\> { BEGIN(INITIAL); }
|
101
|
+
<sgml>.|\n { /* nothing */ }
|
102
|
+
;|\{|\}|\(|\)|\[|\] { feed_token(strdup(yytext), REGULAR_TOKEN); return 1; }
|
103
|
+
[[:alnum:]_.@#/*]+ {
|
104
|
+
if (strncmp(yytext, "/*", 2) == 0) {
|
105
|
+
if (strlen(yytext) >= 4 && strcmp(yytext + strlen(yytext) - 2, "*/") == 0) {
|
106
|
+
/* nothing */
|
107
|
+
} else {
|
108
|
+
BEGIN(c_comment);
|
109
|
+
}
|
110
|
+
} else {
|
111
|
+
feed_token(strdup(yytext), REGULAR_TOKEN);
|
112
|
+
return 1;
|
113
|
+
}
|
114
|
+
}
|
115
|
+
\<\<?|\+|\-|\*|\/|%|&&?|\|\|? { feed_token(strdup(yytext), REGULAR_TOKEN); return 1; }
|
116
|
+
.|\n { /* nothing */ }
|
117
|
+
|
118
|
+
%%
|
119
|
+
|