langscan 1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS.txt +19 -0
- data/History.txt +126 -0
- data/Manifest.txt +167 -0
- data/README.rdoc +89 -0
- data/Rakefile +40 -0
- data/ext/langscan/_make_c.rb +20 -0
- data/ext/langscan/_make_h.rb +30 -0
- data/ext/langscan/_template.c +134 -0
- data/ext/langscan/_template.h +53 -0
- data/ext/langscan/c/c/Makefile +157 -0
- data/ext/langscan/c/c/c.c +134 -0
- data/ext/langscan/c/c/c.h +66 -0
- data/ext/langscan/c/c/ctok.c +4622 -0
- data/ext/langscan/c/c/ctok.l +212 -0
- data/ext/langscan/c/c/extconf.rb +3 -0
- data/ext/langscan/c/c/modulename.txt +1 -0
- data/ext/langscan/c/c/tokenlist.txt +13 -0
- data/ext/langscan/csharp/csharp/Makefile +157 -0
- data/ext/langscan/csharp/csharp/csharp.c +134 -0
- data/ext/langscan/csharp/csharp/csharp.h +65 -0
- data/ext/langscan/csharp/csharp/csharptok.c +2965 -0
- data/ext/langscan/csharp/csharp/csharptok.l +200 -0
- data/ext/langscan/csharp/csharp/extconf.rb +3 -0
- data/ext/langscan/csharp/csharp/modulename.txt +1 -0
- data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
- data/ext/langscan/d/d/Makefile +157 -0
- data/ext/langscan/d/d/d.c +134 -0
- data/ext/langscan/d/d/d.h +64 -0
- data/ext/langscan/d/d/dtok.c +5461 -0
- data/ext/langscan/d/d/dtok.l +282 -0
- data/ext/langscan/d/d/extconf.rb +3 -0
- data/ext/langscan/d/d/modulename.txt +1 -0
- data/ext/langscan/d/d/tokenlist.txt +11 -0
- data/ext/langscan/elisp/elisp/Makefile +157 -0
- data/ext/langscan/elisp/elisp/elisp.c +134 -0
- data/ext/langscan/elisp/elisp/elisp.h +62 -0
- data/ext/langscan/elisp/elisp/elisptok.c +2101 -0
- data/ext/langscan/elisp/elisp/elisptok.l +151 -0
- data/ext/langscan/elisp/elisp/extconf.rb +3 -0
- data/ext/langscan/elisp/elisp/modulename.txt +1 -0
- data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
- data/ext/langscan/java/java/Makefile +157 -0
- data/ext/langscan/java/java/extconf.rb +3 -0
- data/ext/langscan/java/java/java.c +134 -0
- data/ext/langscan/java/java/java.h +64 -0
- data/ext/langscan/java/java/javatok.c +2090 -0
- data/ext/langscan/java/java/javatok.l +155 -0
- data/ext/langscan/java/java/modulename.txt +1 -0
- data/ext/langscan/java/java/tokenlist.txt +11 -0
- data/ext/langscan/javascript/javascript/Makefile +157 -0
- data/ext/langscan/javascript/javascript/extconf.rb +3 -0
- data/ext/langscan/javascript/javascript/javascript.c +134 -0
- data/ext/langscan/javascript/javascript/javascript.h +63 -0
- data/ext/langscan/javascript/javascript/javascripttok.c +2051 -0
- data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
- data/ext/langscan/javascript/javascript/modulename.txt +1 -0
- data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
- data/ext/langscan/pairmatcher/pairmatcher/Makefile +157 -0
- data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
- data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
- data/ext/langscan/php/php/Makefile +157 -0
- data/ext/langscan/php/php/extconf.rb +3 -0
- data/ext/langscan/php/php/modulename.txt +1 -0
- data/ext/langscan/php/php/php.c +134 -0
- data/ext/langscan/php/php/php.h +64 -0
- data/ext/langscan/php/php/phptok.c +2406 -0
- data/ext/langscan/php/php/phptok.l +212 -0
- data/ext/langscan/php/php/tokenlist.txt +11 -0
- data/ext/langscan/post-distclean.rb +21 -0
- data/ext/langscan/pre-config.rb +57 -0
- data/ext/langscan/python/python/Makefile +157 -0
- data/ext/langscan/python/python/extconf.rb +3 -0
- data/ext/langscan/python/python/modulename.txt +1 -0
- data/ext/langscan/python/python/python.c +134 -0
- data/ext/langscan/python/python/python.h +61 -0
- data/ext/langscan/python/python/pythontok.c +2102 -0
- data/ext/langscan/python/python/pythontok.l +155 -0
- data/ext/langscan/python/python/tokenlist.txt +8 -0
- data/ext/langscan/ruby/compat/ripper/Makefile +158 -0
- data/ext/langscan/ruby/compat/ripper/depend +1 -0
- data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
- data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
- data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
- data/ext/langscan/scheme/scheme/Makefile +157 -0
- data/ext/langscan/scheme/scheme/extconf.rb +3 -0
- data/ext/langscan/scheme/scheme/modulename.txt +1 -0
- data/ext/langscan/scheme/scheme/scheme.c +134 -0
- data/ext/langscan/scheme/scheme/scheme.h +60 -0
- data/ext/langscan/scheme/scheme/schemetok.c +2447 -0
- data/ext/langscan/scheme/scheme/schemetok.l +177 -0
- data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
- data/ext/langscan/sh/sh/Makefile +157 -0
- data/ext/langscan/sh/sh/extconf.rb +3 -0
- data/ext/langscan/sh/sh/modulename.txt +1 -0
- data/ext/langscan/sh/sh/sh.c +134 -0
- data/ext/langscan/sh/sh/sh.h +61 -0
- data/ext/langscan/sh/sh/shtok.c +2470 -0
- data/ext/langscan/sh/sh/shtok.l +325 -0
- data/ext/langscan/sh/sh/tokenlist.txt +8 -0
- data/lib/langscan.rb +124 -0
- data/lib/langscan/_common.rb +50 -0
- data/lib/langscan/_easyscanner.rb +78 -0
- data/lib/langscan/_pairmatcher.rb +46 -0
- data/lib/langscan/_type.rb +125 -0
- data/lib/langscan/autoconf.rb +51 -0
- data/lib/langscan/automake.rb +51 -0
- data/lib/langscan/brainfuck.rb +48 -0
- data/lib/langscan/c.rb +144 -0
- data/lib/langscan/csharp.rb +101 -0
- data/lib/langscan/css.rb +109 -0
- data/lib/langscan/d.rb +201 -0
- data/lib/langscan/eiffel.rb +167 -0
- data/lib/langscan/elisp.rb +132 -0
- data/lib/langscan/io.rb +84 -0
- data/lib/langscan/java.rb +95 -0
- data/lib/langscan/javascript.rb +97 -0
- data/lib/langscan/lua.rb +116 -0
- data/lib/langscan/ocaml.rb +298 -0
- data/lib/langscan/ocaml/camlexer.ml +28 -0
- data/lib/langscan/ocaml/lexer.mll +230 -0
- data/lib/langscan/ocaml/types.ml +36 -0
- data/lib/langscan/perl.rb +87 -0
- data/lib/langscan/perl/tokenizer.pl +231 -0
- data/lib/langscan/php.rb +80 -0
- data/lib/langscan/python.rb +101 -0
- data/lib/langscan/rpmspec.rb +71 -0
- data/lib/langscan/ruby.rb +164 -0
- data/lib/langscan/ruby/compat/README +5 -0
- data/lib/langscan/ruby/compat/ripper.rb +4 -0
- data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
- data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
- data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
- data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
- data/lib/langscan/scheme.rb +160 -0
- data/lib/langscan/sh.rb +116 -0
- data/lib/langscan/text.rb +37 -0
- data/metaconfig +2 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/setup.rb +1604 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/langscan.rake +42 -0
- data/test/langscan/brainfuck/test/test_scan.rb +55 -0
- data/test/langscan/c/test/test_scan.rb +216 -0
- data/test/langscan/c/test/test_token.rb +41 -0
- data/test/langscan/csharp/test/test_scan.rb +157 -0
- data/test/langscan/css/test/test_css.rb +79 -0
- data/test/langscan/d/test/test_scan.rb +233 -0
- data/test/langscan/d/test/test_token.rb +205 -0
- data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
- data/test/langscan/elisp/test/test_elisp.rb +177 -0
- data/test/langscan/io/test/test_io.rb +79 -0
- data/test/langscan/java/test/test_java.rb +74 -0
- data/test/langscan/javascript/test/test_javascript.rb +39 -0
- data/test/langscan/lua/test/test_lua.rb +69 -0
- data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
- data/test/langscan/php/test/test_scan.rb +138 -0
- data/test/langscan/python/test/test_scan.rb +105 -0
- data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
- data/test/langscan/ruby/test/test_scan.rb +71 -0
- data/test/langscan/scheme/test/test_scan.rb +198 -0
- data/test/test_helper.rb +7 -0
- data/test/test_langscan.rb +123 -0
- metadata +296 -0
@@ -0,0 +1,325 @@
|
|
1
|
+
/*
|
2
|
+
* shtok.l - a lex rule for shell scripts
|
3
|
+
*
|
4
|
+
* Copyright (C) 2005 Kenichi Ishibashi <bashi at dream.ie.ariake-nct.ac.jp>
|
5
|
+
* All rights reserved.
|
6
|
+
* This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
*
|
8
|
+
* You can redistribute it and/or modify it under the terms of
|
9
|
+
* the GNU General Public License version 2.
|
10
|
+
*/
|
11
|
+
|
12
|
+
%option reentrant
|
13
|
+
%option prefix="langscan_sh_lex_"
|
14
|
+
%option noyywrap
|
15
|
+
%option nodefault
|
16
|
+
%option stack
|
17
|
+
%s DQUOTE
|
18
|
+
%s BQUOTE
|
19
|
+
%s BRACE_SUBST
|
20
|
+
%s PAREN_SUBST
|
21
|
+
%s IN_BRACE
|
22
|
+
%s HEREDOC_DELIMITER
|
23
|
+
%s HEREDOC
|
24
|
+
|
25
|
+
space [ \t]+
|
26
|
+
newline \r\n|\r|\n
|
27
|
+
escseq \\({newline}|.)
|
28
|
+
ident [0-9A-Za-z_][0-9A-Za-z_\-\.]*
|
29
|
+
squote \'[^\']*\'
|
30
|
+
specialvar (\$|\#|\*|@|\?|\-|\!|\_)
|
31
|
+
var_ident ([A-Za-z_][0-9A-Za-z_]*|[0-9]|{specialvar})
|
32
|
+
|
33
|
+
%{
|
34
|
+
|
35
|
+
#include "sh.h"
|
36
|
+
|
37
|
+
#define YY_EXTRA_TYPE langscan_sh_lex_extra_t *
|
38
|
+
|
39
|
+
#if YY_NULL != 0
|
40
|
+
#error "YY_NULL is not 0."
|
41
|
+
#endif
|
42
|
+
|
43
|
+
#define YY_DECL langscan_sh_token_t langscan_sh_lex_lex(yyscan_t yyscanner)
|
44
|
+
|
45
|
+
#define YY_INPUT(buf,result,max_size) \
|
46
|
+
if (!yyextra->eof) { \
|
47
|
+
result = yyextra->user_read(&(yyextra->user_data), (buf), (max_size)); \
|
48
|
+
if (result == 0) \
|
49
|
+
yyextra->eof = 1; \
|
50
|
+
}
|
51
|
+
|
52
|
+
#define UPD update_pos(yyextra, yytext, yyleng)
|
53
|
+
static void update_pos(langscan_sh_lex_extra_t *, char *, int);
|
54
|
+
|
55
|
+
#define report(token) \
|
56
|
+
do { \
|
57
|
+
yyextra->text = yytext; \
|
58
|
+
yyextra->leng = yyleng; \
|
59
|
+
return langscan_sh_##token; \
|
60
|
+
} while (0)
|
61
|
+
|
62
|
+
#define PUSH_STATE(state) yy_push_state(state, yyscanner)
|
63
|
+
#define POP_STATE yy_pop_state(yyscanner)
|
64
|
+
|
65
|
+
static int ident_length(unsigned char *ptr, int max);
|
66
|
+
|
67
|
+
static char *heredoc_delimiter;
|
68
|
+
static enum { HEREDOC_TAB_NO_STRIP, HEREDOC_TAB_STRIP } heredoc_type;
|
69
|
+
static int set_heredoc_delimiter(unsigned char *ptr, int max);
|
70
|
+
|
71
|
+
%}
|
72
|
+
|
73
|
+
%%
|
74
|
+
<INITIAL,IN_BRACE>\<\<\-? {
|
75
|
+
if (yytext[yyleng - 1] == '-')
|
76
|
+
heredoc_type = HEREDOC_TAB_STRIP;
|
77
|
+
else
|
78
|
+
heredoc_type = HEREDOC_TAB_NO_STRIP;
|
79
|
+
UPD;
|
80
|
+
PUSH_STATE(HEREDOC_DELIMITER);
|
81
|
+
report(punct);
|
82
|
+
}
|
83
|
+
<HEREDOC_DELIMITER>[^ \t\r\n].* {
|
84
|
+
int delimiter_leng;
|
85
|
+
delimiter_leng = set_heredoc_delimiter(yytext, yyleng);
|
86
|
+
if (delimiter_leng == -1) YY_FATAL_ERROR("Can't allocate memory");
|
87
|
+
yyless(delimiter_leng);
|
88
|
+
PUSH_STATE(HEREDOC);
|
89
|
+
UPD;
|
90
|
+
report(heredoc_beg);
|
91
|
+
}
|
92
|
+
<HEREDOC>^.+ {
|
93
|
+
int sleng;
|
94
|
+
sleng = 0;
|
95
|
+
if (heredoc_type == HEREDOC_TAB_STRIP) {
|
96
|
+
while (yytext[sleng] == ' ' || yytext[sleng] =='\t') {
|
97
|
+
sleng++;
|
98
|
+
if (sleng >= yyleng) { UPD; report(space); }
|
99
|
+
}
|
100
|
+
}
|
101
|
+
if (strcmp((yytext + sleng), heredoc_delimiter) == 0) { /* end-of-heredoc */
|
102
|
+
free(heredoc_delimiter);
|
103
|
+
POP_STATE;
|
104
|
+
POP_STATE;
|
105
|
+
UPD; report(heredoc_end);
|
106
|
+
}
|
107
|
+
else {
|
108
|
+
UPD; report(string);
|
109
|
+
}
|
110
|
+
}
|
111
|
+
<HEREDOC><<EOF>> {
|
112
|
+
free(heredoc_delimiter);
|
113
|
+
BEGIN(INITIAL);
|
114
|
+
UPD; report(string);
|
115
|
+
}
|
116
|
+
|
117
|
+
|
118
|
+
<INITIAL,BQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>\" {
|
119
|
+
PUSH_STATE(DQUOTE);
|
120
|
+
UPD; report(punct);
|
121
|
+
}
|
122
|
+
<DQUOTE>\" {
|
123
|
+
POP_STATE;
|
124
|
+
UPD; report(punct);
|
125
|
+
}
|
126
|
+
<DQUOTE><<EOF>> { BEGIN(INITIAL); }
|
127
|
+
<DQUOTE>([^\"\`\$\\]|{escseq})+ { UPD; report(string); }
|
128
|
+
|
129
|
+
|
130
|
+
<INITIAL,DQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>\` {
|
131
|
+
PUSH_STATE(BQUOTE);
|
132
|
+
UPD; report(punct);
|
133
|
+
}
|
134
|
+
<BQUOTE>\` {
|
135
|
+
POP_STATE;
|
136
|
+
UPD; report(punct);
|
137
|
+
}
|
138
|
+
<BQUOTE><<EOF>> { BEGIN(INITIAL); }
|
139
|
+
|
140
|
+
|
141
|
+
<INITIAL,DQUOTE,BQUOTE,PAREN_SUBST,IN_BRACE>\$\{ {
|
142
|
+
PUSH_STATE(BRACE_SUBST);
|
143
|
+
UPD; report(punct);
|
144
|
+
}
|
145
|
+
<BRACE_SUBST>\} {
|
146
|
+
POP_STATE;
|
147
|
+
UPD; report(punct);
|
148
|
+
}
|
149
|
+
<BRACE_SUBST>{var_ident} { UPD; report(ident); }
|
150
|
+
<BRACE_SUBST><<EOF>> { BEGIN(INITIAL); }
|
151
|
+
|
152
|
+
|
153
|
+
\{ {
|
154
|
+
PUSH_STATE(IN_BRACE);
|
155
|
+
UPD; report(punct);
|
156
|
+
}
|
157
|
+
<IN_BRACE>\} {
|
158
|
+
POP_STATE;
|
159
|
+
UPD; report(punct);
|
160
|
+
}
|
161
|
+
<IN_BRACE><<EOF>> { BEGIN(INITIAL); }
|
162
|
+
|
163
|
+
|
164
|
+
\$?\( {
|
165
|
+
PUSH_STATE(PAREN_SUBST);
|
166
|
+
UPD; report(punct);
|
167
|
+
}
|
168
|
+
<PAREN_SUBST>\) {
|
169
|
+
POP_STATE;
|
170
|
+
UPD; report(punct);
|
171
|
+
}
|
172
|
+
<PAREN_SUBST><<EOF>> { BEGIN(INITIAL); }
|
173
|
+
|
174
|
+
|
175
|
+
<INITIAL,BQUOTE,PAREN_SUBST,IN_BRACE>^\#.* { UPD; report(comment); }
|
176
|
+
<INITIAL,BQUOTE,PAREN_SUBST,IN_BRACE>{space}\#.* { UPD; report(comment); }
|
177
|
+
<INITIAL,BQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>{squote} { UPD; report(string); }
|
178
|
+
|
179
|
+
{space} { UPD; report(space); }
|
180
|
+
{newline} { UPD; report(space); }
|
181
|
+
{ident}[ \t]*\([ \t]*\) { yyless(ident_length(yytext, yyleng)); UPD; report(fundef); }
|
182
|
+
{ident} { UPD; report(ident); }
|
183
|
+
\${var_ident} { UPD; report(ident); }
|
184
|
+
\\. { UPD; report(punct); }
|
185
|
+
>=|<=|!=|\;\;|\<\<\<|&&|\|\||>&|<& { UPD; report(punct); }
|
186
|
+
. { UPD; report(punct); }
|
187
|
+
|
188
|
+
%%
|
189
|
+
|
190
|
+
static void update_pos(
|
191
|
+
langscan_sh_lex_extra_t *extra,
|
192
|
+
char *text,
|
193
|
+
int leng)
|
194
|
+
{
|
195
|
+
int i, j;
|
196
|
+
extra->beg_byteno = extra->end_byteno;
|
197
|
+
extra->beg_lineno = extra->end_lineno;
|
198
|
+
extra->beg_columnno = extra->end_columnno;
|
199
|
+
j = 0;
|
200
|
+
for (i = 0; i < leng; i++) {
|
201
|
+
if (text[i] == '\n') {
|
202
|
+
extra->end_lineno++;
|
203
|
+
j = i + 1;
|
204
|
+
extra->end_columnno = 0;
|
205
|
+
}
|
206
|
+
}
|
207
|
+
extra->end_columnno += leng - j;
|
208
|
+
extra->end_byteno += leng;
|
209
|
+
}
|
210
|
+
|
211
|
+
static int ident_length(unsigned char *ptr, int max)
|
212
|
+
{
|
213
|
+
int len = 0;
|
214
|
+
while (0 < max &&
|
215
|
+
(('0' <= *ptr && *ptr <= '9') ||
|
216
|
+
('A' <= *ptr && *ptr <= 'Z') ||
|
217
|
+
('a' <= *ptr && *ptr <= 'z') ||
|
218
|
+
*ptr == '_' || *ptr == '-' || *ptr == '.')) {
|
219
|
+
ptr++;
|
220
|
+
len++;
|
221
|
+
max--;
|
222
|
+
}
|
223
|
+
return len;
|
224
|
+
}
|
225
|
+
|
226
|
+
static int set_heredoc_delimiter(unsigned char *ptr, int max)
|
227
|
+
{
|
228
|
+
char *dst, quote_char;
|
229
|
+
int in_quote, len;
|
230
|
+
heredoc_delimiter = malloc(max + 1);
|
231
|
+
if (heredoc_delimiter == NULL) return -1;
|
232
|
+
dst = heredoc_delimiter;
|
233
|
+
len = 0;
|
234
|
+
in_quote = 0;
|
235
|
+
while (len < max) {
|
236
|
+
if (in_quote == 0) { /* unquoted delimiter */
|
237
|
+
if (*ptr == '\'' || *ptr == '\"') {
|
238
|
+
quote_char = *ptr;
|
239
|
+
in_quote = 1;
|
240
|
+
ptr++;
|
241
|
+
if (++len >= max) break;
|
242
|
+
continue;
|
243
|
+
}
|
244
|
+
if (*ptr == ' ' || *ptr == '\t') break;
|
245
|
+
if (*ptr == '\\') {
|
246
|
+
ptr++;
|
247
|
+
if (++len >= max) break;
|
248
|
+
}
|
249
|
+
}
|
250
|
+
else { /* quoted delimiter */
|
251
|
+
if (*ptr == quote_char) {
|
252
|
+
in_quote = 0;
|
253
|
+
ptr++;
|
254
|
+
if (len++ >= max) break;
|
255
|
+
continue;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
*dst++ = *ptr++;
|
259
|
+
len++;
|
260
|
+
}
|
261
|
+
*dst = '\0';
|
262
|
+
return len;
|
263
|
+
}
|
264
|
+
|
265
|
+
langscan_sh_tokenizer_t *langscan_sh_make_tokenizer(
|
266
|
+
size_t (*user_read)(void **user_data_p, char *buf, size_t maxlen),
|
267
|
+
void *user_data)
|
268
|
+
{
|
269
|
+
langscan_sh_tokenizer_t *tokenizer;
|
270
|
+
langscan_sh_lex_extra_t *extra;
|
271
|
+
tokenizer = (langscan_sh_tokenizer_t *)malloc(sizeof(langscan_sh_tokenizer_t));
|
272
|
+
if (tokenizer == NULL)
|
273
|
+
return NULL;
|
274
|
+
extra = (langscan_sh_lex_extra_t *)malloc(sizeof(langscan_sh_lex_extra_t));
|
275
|
+
if (extra == NULL)
|
276
|
+
return NULL;
|
277
|
+
extra->user_read = user_read;
|
278
|
+
extra->user_data = user_data;
|
279
|
+
extra->beg_lineno = 1;
|
280
|
+
extra->beg_columnno = 0;
|
281
|
+
extra->beg_byteno = 0;
|
282
|
+
extra->end_lineno = 1;
|
283
|
+
extra->end_columnno = 0;
|
284
|
+
extra->end_byteno = 0;
|
285
|
+
extra->eof = 0;
|
286
|
+
tokenizer->extra = extra;
|
287
|
+
langscan_sh_lex_lex_init(&tokenizer->scanner);
|
288
|
+
langscan_sh_lex_set_extra(extra, tokenizer->scanner);
|
289
|
+
return tokenizer;
|
290
|
+
}
|
291
|
+
|
292
|
+
langscan_sh_token_t langscan_sh_get_token(langscan_sh_tokenizer_t *tokenizer)
|
293
|
+
{
|
294
|
+
return langscan_sh_lex_lex(tokenizer->scanner);
|
295
|
+
}
|
296
|
+
|
297
|
+
void langscan_sh_free_tokenizer(langscan_sh_tokenizer_t *tokenizer)
|
298
|
+
{
|
299
|
+
langscan_sh_lex_extra_t *extra = langscan_sh_lex_get_extra(tokenizer->scanner);
|
300
|
+
free((void *)extra);
|
301
|
+
langscan_sh_lex_lex_destroy(tokenizer->scanner);
|
302
|
+
free((void *)tokenizer);
|
303
|
+
}
|
304
|
+
|
305
|
+
user_read_t langscan_sh_tokenizer_get_user_read(langscan_sh_tokenizer_t *tokenizer)
|
306
|
+
{
|
307
|
+
return tokenizer->extra->user_read;
|
308
|
+
}
|
309
|
+
|
310
|
+
void *langscan_sh_tokenizer_get_user_data(langscan_sh_tokenizer_t *tokenizer)
|
311
|
+
{
|
312
|
+
return tokenizer->extra->user_data;
|
313
|
+
}
|
314
|
+
|
315
|
+
const char *langscan_sh_token_name(langscan_sh_token_t token)
|
316
|
+
{
|
317
|
+
static char *token_names[] = {
|
318
|
+
"*eof*",
|
319
|
+
#define LANGSCAN_SH_TOKEN(name) #name,
|
320
|
+
LANGSCAN_SH_TOKEN_LIST
|
321
|
+
#undef LANGSCAN_SH_TOKEN
|
322
|
+
};
|
323
|
+
|
324
|
+
return token_names[token];
|
325
|
+
}
|
data/lib/langscan.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
#
|
2
|
+
# langscan.rb - an interface module of LangScan
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module LangScan
|
13
|
+
VERSION = "1.2"
|
14
|
+
LangScanRegistry = {}
|
15
|
+
|
16
|
+
module_function
|
17
|
+
# load *.rb files in _plugin_path_ directory.
|
18
|
+
def load_plugins(plugin_path)
|
19
|
+
$LOAD_PATH.each {|path|
|
20
|
+
candidate_path = File.join(path, plugin_path)
|
21
|
+
next unless File.directory?(candidate_path)
|
22
|
+
Dir.entries(candidate_path).each {|entry|
|
23
|
+
if File.extname(entry) == ".rb" and not /^_/.match(entry)
|
24
|
+
begin
|
25
|
+
require(File.join(plugin_path, entry))
|
26
|
+
rescue LoadError => e
|
27
|
+
# ignore load errors
|
28
|
+
end
|
29
|
+
end
|
30
|
+
}
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
# load LangScan modules.
|
35
|
+
def load
|
36
|
+
load_plugins("langscan")
|
37
|
+
end
|
38
|
+
|
39
|
+
# validate that _mod_ is a LangScan module.
|
40
|
+
def validate_module(mod)
|
41
|
+
common_methods = [:name, :abbrev, :scan]
|
42
|
+
safe_characters = "[a-z]+"
|
43
|
+
common_methods.each {|method|
|
44
|
+
raise "#{mod.to_s} lacks #{method}" unless mod.respond_to?(method)
|
45
|
+
}
|
46
|
+
unless /^#{safe_characters}$/.match(mod.abbrev)
|
47
|
+
raise "#{mod.to_s} invalid abbreviation: #{mod.abbrev}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# register a new LangScan module _mod_.
|
52
|
+
def register(mod)
|
53
|
+
validate_module(mod)
|
54
|
+
mod.extnames.each {|extname|
|
55
|
+
if LangScanRegistry.include?(extname)
|
56
|
+
mod = LangScanRegistry[extname]
|
57
|
+
raise "#{extname} is already used by #{mod.abbrev}"
|
58
|
+
end
|
59
|
+
LangScanRegistry[extname] = mod
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
# return an array contains LangScan modules.
|
64
|
+
def modules
|
65
|
+
LangScanRegistry.values.uniq
|
66
|
+
end
|
67
|
+
|
68
|
+
# return suitable LangScan module choosed by shebang.
|
69
|
+
# return +nil+ if suitable LangScan module is not found.
|
70
|
+
def choose_by_shebang(content)
|
71
|
+
first_line = ""
|
72
|
+
content.each_line {|line|
|
73
|
+
first_line = line
|
74
|
+
break
|
75
|
+
}
|
76
|
+
LangScanRegistry.each_value {|scanner|
|
77
|
+
regexp = /^#!.*\b#{scanner.abbrev}/i
|
78
|
+
return scanner if regexp.match(first_line)
|
79
|
+
}
|
80
|
+
return nil
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
# return suitable LangScan module choosed by emacs mode.
|
85
|
+
# if _content_ contains "-*- mode: c -*-", +choose_by_emacs_mode+ returns
|
86
|
+
# LangScan::C.
|
87
|
+
# return +nil+ if suitable LangScan module is not found.
|
88
|
+
def choose_by_emacs_mode(content)
|
89
|
+
chunk = content[0, 512] # FIXME: magic number
|
90
|
+
LangScanRegistry.each_value {|scanner|
|
91
|
+
mode = Regexp.quote(scanner.name.downcase.gsub(/\s+/, "-"))
|
92
|
+
if scanner.name.include?("/") # "C/C++" etc.
|
93
|
+
mode = "(" + mode + "|"
|
94
|
+
mode << scanner.name.split("/").map {|part| Regexp.quote(part) }.join("|")
|
95
|
+
mode << ")"
|
96
|
+
end
|
97
|
+
regexp = /-\*-\s+mode:\s+#{mode}\s+-\*-/i
|
98
|
+
return scanner if regexp.match(chunk)
|
99
|
+
}
|
100
|
+
return nil
|
101
|
+
end
|
102
|
+
|
103
|
+
# return suitable LangScan module choosed by _content_.
|
104
|
+
# +choose_by_content+ calls +choose_by_shebang+ and +choose_by_emacs_mode+.
|
105
|
+
def choose_by_content(content)
|
106
|
+
return (choose_by_shebang(content) or choose_by_emacs_mode(content))
|
107
|
+
end
|
108
|
+
|
109
|
+
# return suitable LangScan module.
|
110
|
+
def choose(file_name, content = nil)
|
111
|
+
extname = File.extname(file_name)
|
112
|
+
scanner = LangScanRegistry[extname]
|
113
|
+
scanner = choose_by_content(content) if scanner.nil? and content
|
114
|
+
return scanner
|
115
|
+
end
|
116
|
+
|
117
|
+
# return true if _file_name_ supported by LangScan.
|
118
|
+
def support?(file_name)
|
119
|
+
extname = File.extname(file_name)
|
120
|
+
LangScanRegistry.include?(extname)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
LangScan.load
|