langscan 1.2-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS.txt +19 -0
- data/History.txt +126 -0
- data/Manifest.txt +167 -0
- data/README.rdoc +91 -0
- data/Rakefile +40 -0
- data/ext/langscan/_make_c.rb +20 -0
- data/ext/langscan/_make_h.rb +30 -0
- data/ext/langscan/_template.c +134 -0
- data/ext/langscan/_template.h +53 -0
- data/ext/langscan/c/c/Makefile +188 -0
- data/ext/langscan/c/c/c.c +134 -0
- data/ext/langscan/c/c/c.h +66 -0
- data/ext/langscan/c/c/ctok.c +4629 -0
- data/ext/langscan/c/c/ctok.l +212 -0
- data/ext/langscan/c/c/extconf.rb +3 -0
- data/ext/langscan/c/c/modulename.txt +1 -0
- data/ext/langscan/c/c/tokenlist.txt +13 -0
- data/ext/langscan/csharp/csharp/Makefile +188 -0
- data/ext/langscan/csharp/csharp/csharp.c +134 -0
- data/ext/langscan/csharp/csharp/csharp.h +65 -0
- data/ext/langscan/csharp/csharp/csharptok.c +2971 -0
- data/ext/langscan/csharp/csharp/csharptok.l +200 -0
- data/ext/langscan/csharp/csharp/extconf.rb +3 -0
- data/ext/langscan/csharp/csharp/modulename.txt +1 -0
- data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
- data/ext/langscan/d/d/Makefile +188 -0
- data/ext/langscan/d/d/d.c +134 -0
- data/ext/langscan/d/d/d.h +64 -0
- data/ext/langscan/d/d/dtok.c +5468 -0
- data/ext/langscan/d/d/dtok.l +282 -0
- data/ext/langscan/d/d/extconf.rb +3 -0
- data/ext/langscan/d/d/modulename.txt +1 -0
- data/ext/langscan/d/d/tokenlist.txt +11 -0
- data/ext/langscan/elisp/elisp/Makefile +188 -0
- data/ext/langscan/elisp/elisp/elisp.c +134 -0
- data/ext/langscan/elisp/elisp/elisp.h +62 -0
- data/ext/langscan/elisp/elisp/elisptok.c +2108 -0
- data/ext/langscan/elisp/elisp/elisptok.l +151 -0
- data/ext/langscan/elisp/elisp/extconf.rb +3 -0
- data/ext/langscan/elisp/elisp/modulename.txt +1 -0
- data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
- data/ext/langscan/java/java/Makefile +188 -0
- data/ext/langscan/java/java/extconf.rb +3 -0
- data/ext/langscan/java/java/java.c +134 -0
- data/ext/langscan/java/java/java.h +64 -0
- data/ext/langscan/java/java/javatok.c +2097 -0
- data/ext/langscan/java/java/javatok.l +155 -0
- data/ext/langscan/java/java/modulename.txt +1 -0
- data/ext/langscan/java/java/tokenlist.txt +11 -0
- data/ext/langscan/javascript/javascript/Makefile +188 -0
- data/ext/langscan/javascript/javascript/extconf.rb +3 -0
- data/ext/langscan/javascript/javascript/javascript.c +134 -0
- data/ext/langscan/javascript/javascript/javascript.h +63 -0
- data/ext/langscan/javascript/javascript/javascripttok.c +2058 -0
- data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
- data/ext/langscan/javascript/javascript/modulename.txt +1 -0
- data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
- data/ext/langscan/pairmatcher/pairmatcher/Makefile +188 -0
- data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
- data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
- data/ext/langscan/php/php/Makefile +188 -0
- data/ext/langscan/php/php/extconf.rb +3 -0
- data/ext/langscan/php/php/modulename.txt +1 -0
- data/ext/langscan/php/php/php.c +134 -0
- data/ext/langscan/php/php/php.h +64 -0
- data/ext/langscan/php/php/phptok.c +2413 -0
- data/ext/langscan/php/php/phptok.l +212 -0
- data/ext/langscan/php/php/tokenlist.txt +11 -0
- data/ext/langscan/post-distclean.rb +21 -0
- data/ext/langscan/pre-config.rb +57 -0
- data/ext/langscan/python/python/Makefile +188 -0
- data/ext/langscan/python/python/extconf.rb +3 -0
- data/ext/langscan/python/python/modulename.txt +1 -0
- data/ext/langscan/python/python/python.c +134 -0
- data/ext/langscan/python/python/python.h +61 -0
- data/ext/langscan/python/python/pythontok.c +2109 -0
- data/ext/langscan/python/python/pythontok.l +155 -0
- data/ext/langscan/python/python/tokenlist.txt +8 -0
- data/ext/langscan/ruby/compat/ripper/Makefile +189 -0
- data/ext/langscan/ruby/compat/ripper/depend +1 -0
- data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
- data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
- data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
- data/ext/langscan/scheme/scheme/Makefile +188 -0
- data/ext/langscan/scheme/scheme/extconf.rb +3 -0
- data/ext/langscan/scheme/scheme/modulename.txt +1 -0
- data/ext/langscan/scheme/scheme/scheme.c +134 -0
- data/ext/langscan/scheme/scheme/scheme.h +60 -0
- data/ext/langscan/scheme/scheme/schemetok.c +2454 -0
- data/ext/langscan/scheme/scheme/schemetok.l +177 -0
- data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
- data/ext/langscan/sh/sh/Makefile +188 -0
- data/ext/langscan/sh/sh/extconf.rb +3 -0
- data/ext/langscan/sh/sh/modulename.txt +1 -0
- data/ext/langscan/sh/sh/sh.c +134 -0
- data/ext/langscan/sh/sh/sh.h +61 -0
- data/ext/langscan/sh/sh/shtok.c +2477 -0
- data/ext/langscan/sh/sh/shtok.l +325 -0
- data/ext/langscan/sh/sh/tokenlist.txt +8 -0
- data/lib/langscan.rb +124 -0
- data/lib/langscan/_common.rb +50 -0
- data/lib/langscan/_easyscanner.rb +78 -0
- data/lib/langscan/_pairmatcher.rb +46 -0
- data/lib/langscan/_type.rb +125 -0
- data/lib/langscan/autoconf.rb +51 -0
- data/lib/langscan/automake.rb +51 -0
- data/lib/langscan/brainfuck.rb +48 -0
- data/lib/langscan/c.rb +144 -0
- data/lib/langscan/c/c.so +0 -0
- data/lib/langscan/csharp.rb +101 -0
- data/lib/langscan/csharp/csharp.so +0 -0
- data/lib/langscan/css.rb +109 -0
- data/lib/langscan/d.rb +201 -0
- data/lib/langscan/d/d.so +0 -0
- data/lib/langscan/eiffel.rb +167 -0
- data/lib/langscan/elisp.rb +132 -0
- data/lib/langscan/elisp/elisp.so +0 -0
- data/lib/langscan/io.rb +84 -0
- data/lib/langscan/java.rb +95 -0
- data/lib/langscan/java/java.so +0 -0
- data/lib/langscan/javascript.rb +97 -0
- data/lib/langscan/javascript/javascript.so +0 -0
- data/lib/langscan/lua.rb +116 -0
- data/lib/langscan/ocaml.rb +298 -0
- data/lib/langscan/ocaml/camlexer.ml +28 -0
- data/lib/langscan/ocaml/lexer.mll +230 -0
- data/lib/langscan/ocaml/types.ml +36 -0
- data/lib/langscan/pairmatcher/pairmatcher.so +0 -0
- data/lib/langscan/perl.rb +87 -0
- data/lib/langscan/perl/tokenizer.pl +231 -0
- data/lib/langscan/php.rb +80 -0
- data/lib/langscan/php/php.so +0 -0
- data/lib/langscan/python.rb +101 -0
- data/lib/langscan/python/python.so +0 -0
- data/lib/langscan/rpmspec.rb +71 -0
- data/lib/langscan/ruby.rb +164 -0
- data/lib/langscan/ruby/compat/README +5 -0
- data/lib/langscan/ruby/compat/ripper.rb +4 -0
- data/lib/langscan/ruby/compat/ripper.so +0 -0
- data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
- data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
- data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
- data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
- data/lib/langscan/scheme.rb +160 -0
- data/lib/langscan/scheme/scheme.so +0 -0
- data/lib/langscan/sh.rb +116 -0
- data/lib/langscan/sh/sh.so +0 -0
- data/lib/langscan/text.rb +37 -0
- data/metaconfig +2 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/setup.rb +1604 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/langscan.rake +42 -0
- data/test/langscan/brainfuck/test/test_scan.rb +55 -0
- data/test/langscan/c/test/test_scan.rb +216 -0
- data/test/langscan/c/test/test_token.rb +41 -0
- data/test/langscan/csharp/test/test_scan.rb +157 -0
- data/test/langscan/css/test/test_css.rb +79 -0
- data/test/langscan/d/test/test_scan.rb +233 -0
- data/test/langscan/d/test/test_token.rb +205 -0
- data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
- data/test/langscan/elisp/test/test_elisp.rb +177 -0
- data/test/langscan/io/test/test_io.rb +79 -0
- data/test/langscan/java/test/test_java.rb +74 -0
- data/test/langscan/javascript/test/test_javascript.rb +39 -0
- data/test/langscan/lua/test/test_lua.rb +69 -0
- data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
- data/test/langscan/php/test/test_scan.rb +138 -0
- data/test/langscan/python/test/test_scan.rb +105 -0
- data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
- data/test/langscan/ruby/test/test_scan.rb +71 -0
- data/test/langscan/scheme/test/test_scan.rb +198 -0
- data/test/test_helper.rb +7 -0
- data/test/test_langscan.rb +123 -0
- metadata +320 -0
@@ -0,0 +1,325 @@
|
|
1
|
+
/*
|
2
|
+
* shtok.l - a lex rule for shell scripts
|
3
|
+
*
|
4
|
+
* Copyright (C) 2005 Kenichi Ishibashi <bashi at dream.ie.ariake-nct.ac.jp>
|
5
|
+
* All rights reserved.
|
6
|
+
* This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
*
|
8
|
+
* You can redistribute it and/or modify it under the terms of
|
9
|
+
* the GNU General Public License version 2.
|
10
|
+
*/
|
11
|
+
|
12
|
+
%option reentrant
|
13
|
+
%option prefix="langscan_sh_lex_"
|
14
|
+
%option noyywrap
|
15
|
+
%option nodefault
|
16
|
+
%option stack
|
17
|
+
%s DQUOTE
|
18
|
+
%s BQUOTE
|
19
|
+
%s BRACE_SUBST
|
20
|
+
%s PAREN_SUBST
|
21
|
+
%s IN_BRACE
|
22
|
+
%s HEREDOC_DELIMITER
|
23
|
+
%s HEREDOC
|
24
|
+
|
25
|
+
space [ \t]+
|
26
|
+
newline \r\n|\r|\n
|
27
|
+
escseq \\({newline}|.)
|
28
|
+
ident [0-9A-Za-z_][0-9A-Za-z_\-\.]*
|
29
|
+
squote \'[^\']*\'
|
30
|
+
specialvar (\$|\#|\*|@|\?|\-|\!|\_)
|
31
|
+
var_ident ([A-Za-z_][0-9A-Za-z_]*|[0-9]|{specialvar})
|
32
|
+
|
33
|
+
%{
|
34
|
+
|
35
|
+
#include "sh.h"
|
36
|
+
|
37
|
+
#define YY_EXTRA_TYPE langscan_sh_lex_extra_t *
|
38
|
+
|
39
|
+
#if YY_NULL != 0
|
40
|
+
#error "YY_NULL is not 0."
|
41
|
+
#endif
|
42
|
+
|
43
|
+
#define YY_DECL langscan_sh_token_t langscan_sh_lex_lex(yyscan_t yyscanner)
|
44
|
+
|
45
|
+
#define YY_INPUT(buf,result,max_size) \
|
46
|
+
if (!yyextra->eof) { \
|
47
|
+
result = yyextra->user_read(&(yyextra->user_data), (buf), (max_size)); \
|
48
|
+
if (result == 0) \
|
49
|
+
yyextra->eof = 1; \
|
50
|
+
}
|
51
|
+
|
52
|
+
#define UPD update_pos(yyextra, yytext, yyleng)
|
53
|
+
static void update_pos(langscan_sh_lex_extra_t *, char *, int);
|
54
|
+
|
55
|
+
#define report(token) \
|
56
|
+
do { \
|
57
|
+
yyextra->text = yytext; \
|
58
|
+
yyextra->leng = yyleng; \
|
59
|
+
return langscan_sh_##token; \
|
60
|
+
} while (0)
|
61
|
+
|
62
|
+
#define PUSH_STATE(state) yy_push_state(state, yyscanner)
|
63
|
+
#define POP_STATE yy_pop_state(yyscanner)
|
64
|
+
|
65
|
+
static int ident_length(unsigned char *ptr, int max);
|
66
|
+
|
67
|
+
static char *heredoc_delimiter;
|
68
|
+
static enum { HEREDOC_TAB_NO_STRIP, HEREDOC_TAB_STRIP } heredoc_type;
|
69
|
+
static int set_heredoc_delimiter(unsigned char *ptr, int max);
|
70
|
+
|
71
|
+
%}
|
72
|
+
|
73
|
+
%%
|
74
|
+
<INITIAL,IN_BRACE>\<\<\-? {
|
75
|
+
if (yytext[yyleng - 1] == '-')
|
76
|
+
heredoc_type = HEREDOC_TAB_STRIP;
|
77
|
+
else
|
78
|
+
heredoc_type = HEREDOC_TAB_NO_STRIP;
|
79
|
+
UPD;
|
80
|
+
PUSH_STATE(HEREDOC_DELIMITER);
|
81
|
+
report(punct);
|
82
|
+
}
|
83
|
+
<HEREDOC_DELIMITER>[^ \t\r\n].* {
|
84
|
+
int delimiter_leng;
|
85
|
+
delimiter_leng = set_heredoc_delimiter(yytext, yyleng);
|
86
|
+
if (delimiter_leng == -1) YY_FATAL_ERROR("Can't allocate memory");
|
87
|
+
yyless(delimiter_leng);
|
88
|
+
PUSH_STATE(HEREDOC);
|
89
|
+
UPD;
|
90
|
+
report(heredoc_beg);
|
91
|
+
}
|
92
|
+
<HEREDOC>^.+ {
|
93
|
+
int sleng;
|
94
|
+
sleng = 0;
|
95
|
+
if (heredoc_type == HEREDOC_TAB_STRIP) {
|
96
|
+
while (yytext[sleng] == ' ' || yytext[sleng] =='\t') {
|
97
|
+
sleng++;
|
98
|
+
if (sleng >= yyleng) { UPD; report(space); }
|
99
|
+
}
|
100
|
+
}
|
101
|
+
if (strcmp((yytext + sleng), heredoc_delimiter) == 0) { /* end-of-heredoc */
|
102
|
+
free(heredoc_delimiter);
|
103
|
+
POP_STATE;
|
104
|
+
POP_STATE;
|
105
|
+
UPD; report(heredoc_end);
|
106
|
+
}
|
107
|
+
else {
|
108
|
+
UPD; report(string);
|
109
|
+
}
|
110
|
+
}
|
111
|
+
<HEREDOC><<EOF>> {
|
112
|
+
free(heredoc_delimiter);
|
113
|
+
BEGIN(INITIAL);
|
114
|
+
UPD; report(string);
|
115
|
+
}
|
116
|
+
|
117
|
+
|
118
|
+
<INITIAL,BQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>\" {
|
119
|
+
PUSH_STATE(DQUOTE);
|
120
|
+
UPD; report(punct);
|
121
|
+
}
|
122
|
+
<DQUOTE>\" {
|
123
|
+
POP_STATE;
|
124
|
+
UPD; report(punct);
|
125
|
+
}
|
126
|
+
<DQUOTE><<EOF>> { BEGIN(INITIAL); }
|
127
|
+
<DQUOTE>([^\"\`\$\\]|{escseq})+ { UPD; report(string); }
|
128
|
+
|
129
|
+
|
130
|
+
<INITIAL,DQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>\` {
|
131
|
+
PUSH_STATE(BQUOTE);
|
132
|
+
UPD; report(punct);
|
133
|
+
}
|
134
|
+
<BQUOTE>\` {
|
135
|
+
POP_STATE;
|
136
|
+
UPD; report(punct);
|
137
|
+
}
|
138
|
+
<BQUOTE><<EOF>> { BEGIN(INITIAL); }
|
139
|
+
|
140
|
+
|
141
|
+
<INITIAL,DQUOTE,BQUOTE,PAREN_SUBST,IN_BRACE>\$\{ {
|
142
|
+
PUSH_STATE(BRACE_SUBST);
|
143
|
+
UPD; report(punct);
|
144
|
+
}
|
145
|
+
<BRACE_SUBST>\} {
|
146
|
+
POP_STATE;
|
147
|
+
UPD; report(punct);
|
148
|
+
}
|
149
|
+
<BRACE_SUBST>{var_ident} { UPD; report(ident); }
|
150
|
+
<BRACE_SUBST><<EOF>> { BEGIN(INITIAL); }
|
151
|
+
|
152
|
+
|
153
|
+
\{ {
|
154
|
+
PUSH_STATE(IN_BRACE);
|
155
|
+
UPD; report(punct);
|
156
|
+
}
|
157
|
+
<IN_BRACE>\} {
|
158
|
+
POP_STATE;
|
159
|
+
UPD; report(punct);
|
160
|
+
}
|
161
|
+
<IN_BRACE><<EOF>> { BEGIN(INITIAL); }
|
162
|
+
|
163
|
+
|
164
|
+
\$?\( {
|
165
|
+
PUSH_STATE(PAREN_SUBST);
|
166
|
+
UPD; report(punct);
|
167
|
+
}
|
168
|
+
<PAREN_SUBST>\) {
|
169
|
+
POP_STATE;
|
170
|
+
UPD; report(punct);
|
171
|
+
}
|
172
|
+
<PAREN_SUBST><<EOF>> { BEGIN(INITIAL); }
|
173
|
+
|
174
|
+
|
175
|
+
<INITIAL,BQUOTE,PAREN_SUBST,IN_BRACE>^\#.* { UPD; report(comment); }
|
176
|
+
<INITIAL,BQUOTE,PAREN_SUBST,IN_BRACE>{space}\#.* { UPD; report(comment); }
|
177
|
+
<INITIAL,BQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>{squote} { UPD; report(string); }
|
178
|
+
|
179
|
+
{space} { UPD; report(space); }
|
180
|
+
{newline} { UPD; report(space); }
|
181
|
+
{ident}[ \t]*\([ \t]*\) { yyless(ident_length(yytext, yyleng)); UPD; report(fundef); }
|
182
|
+
{ident} { UPD; report(ident); }
|
183
|
+
\${var_ident} { UPD; report(ident); }
|
184
|
+
\\. { UPD; report(punct); }
|
185
|
+
>=|<=|!=|\;\;|\<\<\<|&&|\|\||>&|<& { UPD; report(punct); }
|
186
|
+
. { UPD; report(punct); }
|
187
|
+
|
188
|
+
%%
|
189
|
+
|
190
|
+
static void update_pos(
|
191
|
+
langscan_sh_lex_extra_t *extra,
|
192
|
+
char *text,
|
193
|
+
int leng)
|
194
|
+
{
|
195
|
+
int i, j;
|
196
|
+
extra->beg_byteno = extra->end_byteno;
|
197
|
+
extra->beg_lineno = extra->end_lineno;
|
198
|
+
extra->beg_columnno = extra->end_columnno;
|
199
|
+
j = 0;
|
200
|
+
for (i = 0; i < leng; i++) {
|
201
|
+
if (text[i] == '\n') {
|
202
|
+
extra->end_lineno++;
|
203
|
+
j = i + 1;
|
204
|
+
extra->end_columnno = 0;
|
205
|
+
}
|
206
|
+
}
|
207
|
+
extra->end_columnno += leng - j;
|
208
|
+
extra->end_byteno += leng;
|
209
|
+
}
|
210
|
+
|
211
|
+
static int ident_length(unsigned char *ptr, int max)
|
212
|
+
{
|
213
|
+
int len = 0;
|
214
|
+
while (0 < max &&
|
215
|
+
(('0' <= *ptr && *ptr <= '9') ||
|
216
|
+
('A' <= *ptr && *ptr <= 'Z') ||
|
217
|
+
('a' <= *ptr && *ptr <= 'z') ||
|
218
|
+
*ptr == '_' || *ptr == '-' || *ptr == '.')) {
|
219
|
+
ptr++;
|
220
|
+
len++;
|
221
|
+
max--;
|
222
|
+
}
|
223
|
+
return len;
|
224
|
+
}
|
225
|
+
|
226
|
+
static int set_heredoc_delimiter(unsigned char *ptr, int max)
|
227
|
+
{
|
228
|
+
char *dst, quote_char;
|
229
|
+
int in_quote, len;
|
230
|
+
heredoc_delimiter = malloc(max + 1);
|
231
|
+
if (heredoc_delimiter == NULL) return -1;
|
232
|
+
dst = heredoc_delimiter;
|
233
|
+
len = 0;
|
234
|
+
in_quote = 0;
|
235
|
+
while (len < max) {
|
236
|
+
if (in_quote == 0) { /* unquoted delimiter */
|
237
|
+
if (*ptr == '\'' || *ptr == '\"') {
|
238
|
+
quote_char = *ptr;
|
239
|
+
in_quote = 1;
|
240
|
+
ptr++;
|
241
|
+
if (++len >= max) break;
|
242
|
+
continue;
|
243
|
+
}
|
244
|
+
if (*ptr == ' ' || *ptr == '\t') break;
|
245
|
+
if (*ptr == '\\') {
|
246
|
+
ptr++;
|
247
|
+
if (++len >= max) break;
|
248
|
+
}
|
249
|
+
}
|
250
|
+
else { /* quoted delimiter */
|
251
|
+
if (*ptr == quote_char) {
|
252
|
+
in_quote = 0;
|
253
|
+
ptr++;
|
254
|
+
if (len++ >= max) break;
|
255
|
+
continue;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
*dst++ = *ptr++;
|
259
|
+
len++;
|
260
|
+
}
|
261
|
+
*dst = '\0';
|
262
|
+
return len;
|
263
|
+
}
|
264
|
+
|
265
|
+
langscan_sh_tokenizer_t *langscan_sh_make_tokenizer(
|
266
|
+
size_t (*user_read)(void **user_data_p, char *buf, size_t maxlen),
|
267
|
+
void *user_data)
|
268
|
+
{
|
269
|
+
langscan_sh_tokenizer_t *tokenizer;
|
270
|
+
langscan_sh_lex_extra_t *extra;
|
271
|
+
tokenizer = (langscan_sh_tokenizer_t *)malloc(sizeof(langscan_sh_tokenizer_t));
|
272
|
+
if (tokenizer == NULL)
|
273
|
+
return NULL;
|
274
|
+
extra = (langscan_sh_lex_extra_t *)malloc(sizeof(langscan_sh_lex_extra_t));
|
275
|
+
if (extra == NULL)
|
276
|
+
return NULL;
|
277
|
+
extra->user_read = user_read;
|
278
|
+
extra->user_data = user_data;
|
279
|
+
extra->beg_lineno = 1;
|
280
|
+
extra->beg_columnno = 0;
|
281
|
+
extra->beg_byteno = 0;
|
282
|
+
extra->end_lineno = 1;
|
283
|
+
extra->end_columnno = 0;
|
284
|
+
extra->end_byteno = 0;
|
285
|
+
extra->eof = 0;
|
286
|
+
tokenizer->extra = extra;
|
287
|
+
langscan_sh_lex_lex_init(&tokenizer->scanner);
|
288
|
+
langscan_sh_lex_set_extra(extra, tokenizer->scanner);
|
289
|
+
return tokenizer;
|
290
|
+
}
|
291
|
+
|
292
|
+
langscan_sh_token_t langscan_sh_get_token(langscan_sh_tokenizer_t *tokenizer)
|
293
|
+
{
|
294
|
+
return langscan_sh_lex_lex(tokenizer->scanner);
|
295
|
+
}
|
296
|
+
|
297
|
+
void langscan_sh_free_tokenizer(langscan_sh_tokenizer_t *tokenizer)
|
298
|
+
{
|
299
|
+
langscan_sh_lex_extra_t *extra = langscan_sh_lex_get_extra(tokenizer->scanner);
|
300
|
+
free((void *)extra);
|
301
|
+
langscan_sh_lex_lex_destroy(tokenizer->scanner);
|
302
|
+
free((void *)tokenizer);
|
303
|
+
}
|
304
|
+
|
305
|
+
user_read_t langscan_sh_tokenizer_get_user_read(langscan_sh_tokenizer_t *tokenizer)
|
306
|
+
{
|
307
|
+
return tokenizer->extra->user_read;
|
308
|
+
}
|
309
|
+
|
310
|
+
void *langscan_sh_tokenizer_get_user_data(langscan_sh_tokenizer_t *tokenizer)
|
311
|
+
{
|
312
|
+
return tokenizer->extra->user_data;
|
313
|
+
}
|
314
|
+
|
315
|
+
const char *langscan_sh_token_name(langscan_sh_token_t token)
|
316
|
+
{
|
317
|
+
static char *token_names[] = {
|
318
|
+
"*eof*",
|
319
|
+
#define LANGSCAN_SH_TOKEN(name) #name,
|
320
|
+
LANGSCAN_SH_TOKEN_LIST
|
321
|
+
#undef LANGSCAN_SH_TOKEN
|
322
|
+
};
|
323
|
+
|
324
|
+
return token_names[token];
|
325
|
+
}
|
data/lib/langscan.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
#
|
2
|
+
# langscan.rb - an interface module of LangScan
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module LangScan
|
13
|
+
VERSION = "1.2"
|
14
|
+
LangScanRegistry = {}
|
15
|
+
|
16
|
+
module_function
|
17
|
+
# load *.rb files in _plugin_path_ directory.
|
18
|
+
def load_plugins(plugin_path)
|
19
|
+
$LOAD_PATH.each {|path|
|
20
|
+
candidate_path = File.join(path, plugin_path)
|
21
|
+
next unless File.directory?(candidate_path)
|
22
|
+
Dir.entries(candidate_path).each {|entry|
|
23
|
+
if File.extname(entry) == ".rb" and not /^_/.match(entry)
|
24
|
+
begin
|
25
|
+
require(File.join(plugin_path, entry))
|
26
|
+
rescue LoadError => e
|
27
|
+
# ignore load errors
|
28
|
+
end
|
29
|
+
end
|
30
|
+
}
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
# load LangScan modules.
|
35
|
+
def load
|
36
|
+
load_plugins("langscan")
|
37
|
+
end
|
38
|
+
|
39
|
+
# validate that _mod_ is a LangScan module.
|
40
|
+
def validate_module(mod)
|
41
|
+
common_methods = [:name, :abbrev, :scan]
|
42
|
+
safe_characters = "[a-z]+"
|
43
|
+
common_methods.each {|method|
|
44
|
+
raise "#{mod.to_s} lacks #{method}" unless mod.respond_to?(method)
|
45
|
+
}
|
46
|
+
unless /^#{safe_characters}$/.match(mod.abbrev)
|
47
|
+
raise "#{mod.to_s} invalid abbreviation: #{mod.abbrev}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# register a new LangScan module _mod_.
|
52
|
+
def register(mod)
|
53
|
+
validate_module(mod)
|
54
|
+
mod.extnames.each {|extname|
|
55
|
+
if LangScanRegistry.include?(extname)
|
56
|
+
mod = LangScanRegistry[extname]
|
57
|
+
raise "#{extname} is already used by #{mod.abbrev}"
|
58
|
+
end
|
59
|
+
LangScanRegistry[extname] = mod
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
# return an array contains LangScan modules.
|
64
|
+
def modules
|
65
|
+
LangScanRegistry.values.uniq
|
66
|
+
end
|
67
|
+
|
68
|
+
# return suitable LangScan module choosed by shebang.
|
69
|
+
# return +nil+ if suitable LangScan module is not found.
|
70
|
+
def choose_by_shebang(content)
|
71
|
+
first_line = ""
|
72
|
+
content.each_line {|line|
|
73
|
+
first_line = line
|
74
|
+
break
|
75
|
+
}
|
76
|
+
LangScanRegistry.each_value {|scanner|
|
77
|
+
regexp = /^#!.*\b#{scanner.abbrev}/i
|
78
|
+
return scanner if regexp.match(first_line)
|
79
|
+
}
|
80
|
+
return nil
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
# return suitable LangScan module choosed by emacs mode.
|
85
|
+
# if _content_ contains "-*- mode: c -*-", +choose_by_emacs_mode+ returns
|
86
|
+
# LangScan::C.
|
87
|
+
# return +nil+ if suitable LangScan module is not found.
|
88
|
+
def choose_by_emacs_mode(content)
|
89
|
+
chunk = content[0, 512] # FIXME: magic number
|
90
|
+
LangScanRegistry.each_value {|scanner|
|
91
|
+
mode = Regexp.quote(scanner.name.downcase.gsub(/\s+/, "-"))
|
92
|
+
if scanner.name.include?("/") # "C/C++" etc.
|
93
|
+
mode = "(" + mode + "|"
|
94
|
+
mode << scanner.name.split("/").map {|part| Regexp.quote(part) }.join("|")
|
95
|
+
mode << ")"
|
96
|
+
end
|
97
|
+
regexp = /-\*-\s+mode:\s+#{mode}\s+-\*-/i
|
98
|
+
return scanner if regexp.match(chunk)
|
99
|
+
}
|
100
|
+
return nil
|
101
|
+
end
|
102
|
+
|
103
|
+
# return suitable LangScan module choosed by _content_.
|
104
|
+
# +choose_by_content+ calls +choose_by_shebang+ and +choose_by_emacs_mode+.
|
105
|
+
def choose_by_content(content)
|
106
|
+
return (choose_by_shebang(content) or choose_by_emacs_mode(content))
|
107
|
+
end
|
108
|
+
|
109
|
+
# return suitable LangScan module.
|
110
|
+
def choose(file_name, content = nil)
|
111
|
+
extname = File.extname(file_name)
|
112
|
+
scanner = LangScanRegistry[extname]
|
113
|
+
scanner = choose_by_content(content) if scanner.nil? and content
|
114
|
+
return scanner
|
115
|
+
end
|
116
|
+
|
117
|
+
# return true if _file_name_ supported by LangScan.
|
118
|
+
def support?(file_name)
|
119
|
+
extname = File.extname(file_name)
|
120
|
+
LangScanRegistry.include?(extname)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
LangScan.load
|