langscan 1.2-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. data/AUTHORS.txt +19 -0
  2. data/History.txt +126 -0
  3. data/Manifest.txt +167 -0
  4. data/README.rdoc +91 -0
  5. data/Rakefile +40 -0
  6. data/ext/langscan/_make_c.rb +20 -0
  7. data/ext/langscan/_make_h.rb +30 -0
  8. data/ext/langscan/_template.c +134 -0
  9. data/ext/langscan/_template.h +53 -0
  10. data/ext/langscan/c/c/Makefile +188 -0
  11. data/ext/langscan/c/c/c.c +134 -0
  12. data/ext/langscan/c/c/c.h +66 -0
  13. data/ext/langscan/c/c/ctok.c +4629 -0
  14. data/ext/langscan/c/c/ctok.l +212 -0
  15. data/ext/langscan/c/c/extconf.rb +3 -0
  16. data/ext/langscan/c/c/modulename.txt +1 -0
  17. data/ext/langscan/c/c/tokenlist.txt +13 -0
  18. data/ext/langscan/csharp/csharp/Makefile +188 -0
  19. data/ext/langscan/csharp/csharp/csharp.c +134 -0
  20. data/ext/langscan/csharp/csharp/csharp.h +65 -0
  21. data/ext/langscan/csharp/csharp/csharptok.c +2971 -0
  22. data/ext/langscan/csharp/csharp/csharptok.l +200 -0
  23. data/ext/langscan/csharp/csharp/extconf.rb +3 -0
  24. data/ext/langscan/csharp/csharp/modulename.txt +1 -0
  25. data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
  26. data/ext/langscan/d/d/Makefile +188 -0
  27. data/ext/langscan/d/d/d.c +134 -0
  28. data/ext/langscan/d/d/d.h +64 -0
  29. data/ext/langscan/d/d/dtok.c +5468 -0
  30. data/ext/langscan/d/d/dtok.l +282 -0
  31. data/ext/langscan/d/d/extconf.rb +3 -0
  32. data/ext/langscan/d/d/modulename.txt +1 -0
  33. data/ext/langscan/d/d/tokenlist.txt +11 -0
  34. data/ext/langscan/elisp/elisp/Makefile +188 -0
  35. data/ext/langscan/elisp/elisp/elisp.c +134 -0
  36. data/ext/langscan/elisp/elisp/elisp.h +62 -0
  37. data/ext/langscan/elisp/elisp/elisptok.c +2108 -0
  38. data/ext/langscan/elisp/elisp/elisptok.l +151 -0
  39. data/ext/langscan/elisp/elisp/extconf.rb +3 -0
  40. data/ext/langscan/elisp/elisp/modulename.txt +1 -0
  41. data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
  42. data/ext/langscan/java/java/Makefile +188 -0
  43. data/ext/langscan/java/java/extconf.rb +3 -0
  44. data/ext/langscan/java/java/java.c +134 -0
  45. data/ext/langscan/java/java/java.h +64 -0
  46. data/ext/langscan/java/java/javatok.c +2097 -0
  47. data/ext/langscan/java/java/javatok.l +155 -0
  48. data/ext/langscan/java/java/modulename.txt +1 -0
  49. data/ext/langscan/java/java/tokenlist.txt +11 -0
  50. data/ext/langscan/javascript/javascript/Makefile +188 -0
  51. data/ext/langscan/javascript/javascript/extconf.rb +3 -0
  52. data/ext/langscan/javascript/javascript/javascript.c +134 -0
  53. data/ext/langscan/javascript/javascript/javascript.h +63 -0
  54. data/ext/langscan/javascript/javascript/javascripttok.c +2058 -0
  55. data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
  56. data/ext/langscan/javascript/javascript/modulename.txt +1 -0
  57. data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
  58. data/ext/langscan/pairmatcher/pairmatcher/Makefile +188 -0
  59. data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
  60. data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
  61. data/ext/langscan/php/php/Makefile +188 -0
  62. data/ext/langscan/php/php/extconf.rb +3 -0
  63. data/ext/langscan/php/php/modulename.txt +1 -0
  64. data/ext/langscan/php/php/php.c +134 -0
  65. data/ext/langscan/php/php/php.h +64 -0
  66. data/ext/langscan/php/php/phptok.c +2413 -0
  67. data/ext/langscan/php/php/phptok.l +212 -0
  68. data/ext/langscan/php/php/tokenlist.txt +11 -0
  69. data/ext/langscan/post-distclean.rb +21 -0
  70. data/ext/langscan/pre-config.rb +57 -0
  71. data/ext/langscan/python/python/Makefile +188 -0
  72. data/ext/langscan/python/python/extconf.rb +3 -0
  73. data/ext/langscan/python/python/modulename.txt +1 -0
  74. data/ext/langscan/python/python/python.c +134 -0
  75. data/ext/langscan/python/python/python.h +61 -0
  76. data/ext/langscan/python/python/pythontok.c +2109 -0
  77. data/ext/langscan/python/python/pythontok.l +155 -0
  78. data/ext/langscan/python/python/tokenlist.txt +8 -0
  79. data/ext/langscan/ruby/compat/ripper/Makefile +189 -0
  80. data/ext/langscan/ruby/compat/ripper/depend +1 -0
  81. data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
  82. data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
  83. data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
  84. data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
  85. data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
  86. data/ext/langscan/scheme/scheme/Makefile +188 -0
  87. data/ext/langscan/scheme/scheme/extconf.rb +3 -0
  88. data/ext/langscan/scheme/scheme/modulename.txt +1 -0
  89. data/ext/langscan/scheme/scheme/scheme.c +134 -0
  90. data/ext/langscan/scheme/scheme/scheme.h +60 -0
  91. data/ext/langscan/scheme/scheme/schemetok.c +2454 -0
  92. data/ext/langscan/scheme/scheme/schemetok.l +177 -0
  93. data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
  94. data/ext/langscan/sh/sh/Makefile +188 -0
  95. data/ext/langscan/sh/sh/extconf.rb +3 -0
  96. data/ext/langscan/sh/sh/modulename.txt +1 -0
  97. data/ext/langscan/sh/sh/sh.c +134 -0
  98. data/ext/langscan/sh/sh/sh.h +61 -0
  99. data/ext/langscan/sh/sh/shtok.c +2477 -0
  100. data/ext/langscan/sh/sh/shtok.l +325 -0
  101. data/ext/langscan/sh/sh/tokenlist.txt +8 -0
  102. data/lib/langscan.rb +124 -0
  103. data/lib/langscan/_common.rb +50 -0
  104. data/lib/langscan/_easyscanner.rb +78 -0
  105. data/lib/langscan/_pairmatcher.rb +46 -0
  106. data/lib/langscan/_type.rb +125 -0
  107. data/lib/langscan/autoconf.rb +51 -0
  108. data/lib/langscan/automake.rb +51 -0
  109. data/lib/langscan/brainfuck.rb +48 -0
  110. data/lib/langscan/c.rb +144 -0
  111. data/lib/langscan/c/c.so +0 -0
  112. data/lib/langscan/csharp.rb +101 -0
  113. data/lib/langscan/csharp/csharp.so +0 -0
  114. data/lib/langscan/css.rb +109 -0
  115. data/lib/langscan/d.rb +201 -0
  116. data/lib/langscan/d/d.so +0 -0
  117. data/lib/langscan/eiffel.rb +167 -0
  118. data/lib/langscan/elisp.rb +132 -0
  119. data/lib/langscan/elisp/elisp.so +0 -0
  120. data/lib/langscan/io.rb +84 -0
  121. data/lib/langscan/java.rb +95 -0
  122. data/lib/langscan/java/java.so +0 -0
  123. data/lib/langscan/javascript.rb +97 -0
  124. data/lib/langscan/javascript/javascript.so +0 -0
  125. data/lib/langscan/lua.rb +116 -0
  126. data/lib/langscan/ocaml.rb +298 -0
  127. data/lib/langscan/ocaml/camlexer.ml +28 -0
  128. data/lib/langscan/ocaml/lexer.mll +230 -0
  129. data/lib/langscan/ocaml/types.ml +36 -0
  130. data/lib/langscan/pairmatcher/pairmatcher.so +0 -0
  131. data/lib/langscan/perl.rb +87 -0
  132. data/lib/langscan/perl/tokenizer.pl +231 -0
  133. data/lib/langscan/php.rb +80 -0
  134. data/lib/langscan/php/php.so +0 -0
  135. data/lib/langscan/python.rb +101 -0
  136. data/lib/langscan/python/python.so +0 -0
  137. data/lib/langscan/rpmspec.rb +71 -0
  138. data/lib/langscan/ruby.rb +164 -0
  139. data/lib/langscan/ruby/compat/README +5 -0
  140. data/lib/langscan/ruby/compat/ripper.rb +4 -0
  141. data/lib/langscan/ruby/compat/ripper.so +0 -0
  142. data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
  143. data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
  144. data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
  145. data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
  146. data/lib/langscan/scheme.rb +160 -0
  147. data/lib/langscan/scheme/scheme.so +0 -0
  148. data/lib/langscan/sh.rb +116 -0
  149. data/lib/langscan/sh/sh.so +0 -0
  150. data/lib/langscan/text.rb +37 -0
  151. data/metaconfig +2 -0
  152. data/script/console +10 -0
  153. data/script/destroy +14 -0
  154. data/script/generate +14 -0
  155. data/script/makemanifest.rb +21 -0
  156. data/setup.rb +1604 -0
  157. data/tasks/extconf.rake +13 -0
  158. data/tasks/extconf/langscan.rake +42 -0
  159. data/test/langscan/brainfuck/test/test_scan.rb +55 -0
  160. data/test/langscan/c/test/test_scan.rb +216 -0
  161. data/test/langscan/c/test/test_token.rb +41 -0
  162. data/test/langscan/csharp/test/test_scan.rb +157 -0
  163. data/test/langscan/css/test/test_css.rb +79 -0
  164. data/test/langscan/d/test/test_scan.rb +233 -0
  165. data/test/langscan/d/test/test_token.rb +205 -0
  166. data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
  167. data/test/langscan/elisp/test/test_elisp.rb +177 -0
  168. data/test/langscan/io/test/test_io.rb +79 -0
  169. data/test/langscan/java/test/test_java.rb +74 -0
  170. data/test/langscan/javascript/test/test_javascript.rb +39 -0
  171. data/test/langscan/lua/test/test_lua.rb +69 -0
  172. data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
  173. data/test/langscan/php/test/test_scan.rb +138 -0
  174. data/test/langscan/python/test/test_scan.rb +105 -0
  175. data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
  176. data/test/langscan/ruby/test/test_scan.rb +71 -0
  177. data/test/langscan/scheme/test/test_scan.rb +198 -0
  178. data/test/test_helper.rb +7 -0
  179. data/test/test_langscan.rb +123 -0
  180. metadata +320 -0
@@ -0,0 +1,325 @@
1
+ /*
2
+ * shtok.l - a lex rule for shell scripts
3
+ *
4
+ * Copyright (C) 2005 Kenichi Ishibashi <bashi at dream.ie.ariake-nct.ac.jp>
5
+ * All rights reserved.
6
+ * This is free software with ABSOLUTELY NO WARRANTY.
7
+ *
8
+ * You can redistribute it and/or modify it under the terms of
9
+ * the GNU General Public License version 2.
10
+ */
11
+
12
+ %option reentrant
13
+ %option prefix="langscan_sh_lex_"
14
+ %option noyywrap
15
+ %option nodefault
16
+ %option stack
17
+ %s DQUOTE
18
+ %s BQUOTE
19
+ %s BRACE_SUBST
20
+ %s PAREN_SUBST
21
+ %s IN_BRACE
22
+ %s HEREDOC_DELIMITER
23
+ %s HEREDOC
24
+
25
+ space [ \t]+
26
+ newline \r\n|\r|\n
27
+ escseq \\({newline}|.)
28
+ ident [0-9A-Za-z_][0-9A-Za-z_\-\.]*
29
+ squote \'[^\']*\'
30
+ specialvar (\$|\#|\*|@|\?|\-|\!|\_)
31
+ var_ident ([A-Za-z_][0-9A-Za-z_]*|[0-9]|{specialvar})
32
+
33
+ %{
34
+
35
+ #include "sh.h"
36
+
37
+ #define YY_EXTRA_TYPE langscan_sh_lex_extra_t *
38
+
39
+ #if YY_NULL != 0
40
+ #error "YY_NULL is not 0."
41
+ #endif
42
+
43
+ #define YY_DECL langscan_sh_token_t langscan_sh_lex_lex(yyscan_t yyscanner)
44
+
45
+ #define YY_INPUT(buf,result,max_size) \
46
+ if (!yyextra->eof) { \
47
+ result = yyextra->user_read(&(yyextra->user_data), (buf), (max_size)); \
48
+ if (result == 0) \
49
+ yyextra->eof = 1; \
50
+ }
51
+
52
+ #define UPD update_pos(yyextra, yytext, yyleng)
53
+ static void update_pos(langscan_sh_lex_extra_t *, char *, int);
54
+
55
+ #define report(token) \
56
+ do { \
57
+ yyextra->text = yytext; \
58
+ yyextra->leng = yyleng; \
59
+ return langscan_sh_##token; \
60
+ } while (0)
61
+
62
+ #define PUSH_STATE(state) yy_push_state(state, yyscanner)
63
+ #define POP_STATE yy_pop_state(yyscanner)
64
+
65
+ static int ident_length(unsigned char *ptr, int max);
66
+
67
+ static char *heredoc_delimiter;
68
+ static enum { HEREDOC_TAB_NO_STRIP, HEREDOC_TAB_STRIP } heredoc_type;
69
+ static int set_heredoc_delimiter(unsigned char *ptr, int max);
70
+
71
+ %}
72
+
73
+ %%
74
+ <INITIAL,IN_BRACE>\<\<\-? {
75
+ if (yytext[yyleng - 1] == '-')
76
+ heredoc_type = HEREDOC_TAB_STRIP;
77
+ else
78
+ heredoc_type = HEREDOC_TAB_NO_STRIP;
79
+ UPD;
80
+ PUSH_STATE(HEREDOC_DELIMITER);
81
+ report(punct);
82
+ }
83
+ <HEREDOC_DELIMITER>[^ \t\r\n].* {
84
+ int delimiter_leng;
85
+ delimiter_leng = set_heredoc_delimiter(yytext, yyleng);
86
+ if (delimiter_leng == -1) YY_FATAL_ERROR("Can't allocate memory");
87
+ yyless(delimiter_leng);
88
+ PUSH_STATE(HEREDOC);
89
+ UPD;
90
+ report(heredoc_beg);
91
+ }
92
+ <HEREDOC>^.+ {
93
+ int sleng;
94
+ sleng = 0;
95
+ if (heredoc_type == HEREDOC_TAB_STRIP) {
96
+ while (yytext[sleng] == ' ' || yytext[sleng] =='\t') {
97
+ sleng++;
98
+ if (sleng >= yyleng) { UPD; report(space); }
99
+ }
100
+ }
101
+ if (strcmp((yytext + sleng), heredoc_delimiter) == 0) { /* end-of-heredoc */
102
+ free(heredoc_delimiter);
103
+ POP_STATE;
104
+ POP_STATE;
105
+ UPD; report(heredoc_end);
106
+ }
107
+ else {
108
+ UPD; report(string);
109
+ }
110
+ }
111
+ <HEREDOC><<EOF>> {
112
+ free(heredoc_delimiter);
113
+ BEGIN(INITIAL);
114
+ UPD; report(string);
115
+ }
116
+
117
+
118
+ <INITIAL,BQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>\" {
119
+ PUSH_STATE(DQUOTE);
120
+ UPD; report(punct);
121
+ }
122
+ <DQUOTE>\" {
123
+ POP_STATE;
124
+ UPD; report(punct);
125
+ }
126
+ <DQUOTE><<EOF>> { BEGIN(INITIAL); }
127
+ <DQUOTE>([^\"\`\$\\]|{escseq})+ { UPD; report(string); }
128
+
129
+
130
+ <INITIAL,DQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>\` {
131
+ PUSH_STATE(BQUOTE);
132
+ UPD; report(punct);
133
+ }
134
+ <BQUOTE>\` {
135
+ POP_STATE;
136
+ UPD; report(punct);
137
+ }
138
+ <BQUOTE><<EOF>> { BEGIN(INITIAL); }
139
+
140
+
141
+ <INITIAL,DQUOTE,BQUOTE,PAREN_SUBST,IN_BRACE>\$\{ {
142
+ PUSH_STATE(BRACE_SUBST);
143
+ UPD; report(punct);
144
+ }
145
+ <BRACE_SUBST>\} {
146
+ POP_STATE;
147
+ UPD; report(punct);
148
+ }
149
+ <BRACE_SUBST>{var_ident} { UPD; report(ident); }
150
+ <BRACE_SUBST><<EOF>> { BEGIN(INITIAL); }
151
+
152
+
153
+ \{ {
154
+ PUSH_STATE(IN_BRACE);
155
+ UPD; report(punct);
156
+ }
157
+ <IN_BRACE>\} {
158
+ POP_STATE;
159
+ UPD; report(punct);
160
+ }
161
+ <IN_BRACE><<EOF>> { BEGIN(INITIAL); }
162
+
163
+
164
+ \$?\( {
165
+ PUSH_STATE(PAREN_SUBST);
166
+ UPD; report(punct);
167
+ }
168
+ <PAREN_SUBST>\) {
169
+ POP_STATE;
170
+ UPD; report(punct);
171
+ }
172
+ <PAREN_SUBST><<EOF>> { BEGIN(INITIAL); }
173
+
174
+
175
+ <INITIAL,BQUOTE,PAREN_SUBST,IN_BRACE>^\#.* { UPD; report(comment); }
176
+ <INITIAL,BQUOTE,PAREN_SUBST,IN_BRACE>{space}\#.* { UPD; report(comment); }
177
+ <INITIAL,BQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>{squote} { UPD; report(string); }
178
+
179
+ {space} { UPD; report(space); }
180
+ {newline} { UPD; report(space); }
181
+ {ident}[ \t]*\([ \t]*\) { yyless(ident_length(yytext, yyleng)); UPD; report(fundef); }
182
+ {ident} { UPD; report(ident); }
183
+ \${var_ident} { UPD; report(ident); }
184
+ \\. { UPD; report(punct); }
185
+ >=|<=|!=|\;\;|\<\<\<|&&|\|\||>&|<& { UPD; report(punct); }
186
+ . { UPD; report(punct); }
187
+
188
+ %%
189
+
190
+ static void update_pos(
191
+ langscan_sh_lex_extra_t *extra,
192
+ char *text,
193
+ int leng)
194
+ {
195
+ int i, j;
196
+ extra->beg_byteno = extra->end_byteno;
197
+ extra->beg_lineno = extra->end_lineno;
198
+ extra->beg_columnno = extra->end_columnno;
199
+ j = 0;
200
+ for (i = 0; i < leng; i++) {
201
+ if (text[i] == '\n') {
202
+ extra->end_lineno++;
203
+ j = i + 1;
204
+ extra->end_columnno = 0;
205
+ }
206
+ }
207
+ extra->end_columnno += leng - j;
208
+ extra->end_byteno += leng;
209
+ }
210
+
211
+ static int ident_length(unsigned char *ptr, int max)
212
+ {
213
+ int len = 0;
214
+ while (0 < max &&
215
+ (('0' <= *ptr && *ptr <= '9') ||
216
+ ('A' <= *ptr && *ptr <= 'Z') ||
217
+ ('a' <= *ptr && *ptr <= 'z') ||
218
+ *ptr == '_' || *ptr == '-' || *ptr == '.')) {
219
+ ptr++;
220
+ len++;
221
+ max--;
222
+ }
223
+ return len;
224
+ }
225
+
226
+ static int set_heredoc_delimiter(unsigned char *ptr, int max)
227
+ {
228
+ char *dst, quote_char;
229
+ int in_quote, len;
230
+ heredoc_delimiter = malloc(max + 1);
231
+ if (heredoc_delimiter == NULL) return -1;
232
+ dst = heredoc_delimiter;
233
+ len = 0;
234
+ in_quote = 0;
235
+ while (len < max) {
236
+ if (in_quote == 0) { /* unquoted delimiter */
237
+ if (*ptr == '\'' || *ptr == '\"') {
238
+ quote_char = *ptr;
239
+ in_quote = 1;
240
+ ptr++;
241
+ if (++len >= max) break;
242
+ continue;
243
+ }
244
+ if (*ptr == ' ' || *ptr == '\t') break;
245
+ if (*ptr == '\\') {
246
+ ptr++;
247
+ if (++len >= max) break;
248
+ }
249
+ }
250
+ else { /* quoted delimiter */
251
+ if (*ptr == quote_char) {
252
+ in_quote = 0;
253
+ ptr++;
254
+ if (len++ >= max) break;
255
+ continue;
256
+ }
257
+ }
258
+ *dst++ = *ptr++;
259
+ len++;
260
+ }
261
+ *dst = '\0';
262
+ return len;
263
+ }
264
+
265
+ langscan_sh_tokenizer_t *langscan_sh_make_tokenizer(
266
+ size_t (*user_read)(void **user_data_p, char *buf, size_t maxlen),
267
+ void *user_data)
268
+ {
269
+ langscan_sh_tokenizer_t *tokenizer;
270
+ langscan_sh_lex_extra_t *extra;
271
+ tokenizer = (langscan_sh_tokenizer_t *)malloc(sizeof(langscan_sh_tokenizer_t));
272
+ if (tokenizer == NULL)
273
+ return NULL;
274
+ extra = (langscan_sh_lex_extra_t *)malloc(sizeof(langscan_sh_lex_extra_t));
275
+ if (extra == NULL)
276
+ return NULL;
277
+ extra->user_read = user_read;
278
+ extra->user_data = user_data;
279
+ extra->beg_lineno = 1;
280
+ extra->beg_columnno = 0;
281
+ extra->beg_byteno = 0;
282
+ extra->end_lineno = 1;
283
+ extra->end_columnno = 0;
284
+ extra->end_byteno = 0;
285
+ extra->eof = 0;
286
+ tokenizer->extra = extra;
287
+ langscan_sh_lex_lex_init(&tokenizer->scanner);
288
+ langscan_sh_lex_set_extra(extra, tokenizer->scanner);
289
+ return tokenizer;
290
+ }
291
+
292
+ langscan_sh_token_t langscan_sh_get_token(langscan_sh_tokenizer_t *tokenizer)
293
+ {
294
+ return langscan_sh_lex_lex(tokenizer->scanner);
295
+ }
296
+
297
+ void langscan_sh_free_tokenizer(langscan_sh_tokenizer_t *tokenizer)
298
+ {
299
+ langscan_sh_lex_extra_t *extra = langscan_sh_lex_get_extra(tokenizer->scanner);
300
+ free((void *)extra);
301
+ langscan_sh_lex_lex_destroy(tokenizer->scanner);
302
+ free((void *)tokenizer);
303
+ }
304
+
305
+ user_read_t langscan_sh_tokenizer_get_user_read(langscan_sh_tokenizer_t *tokenizer)
306
+ {
307
+ return tokenizer->extra->user_read;
308
+ }
309
+
310
+ void *langscan_sh_tokenizer_get_user_data(langscan_sh_tokenizer_t *tokenizer)
311
+ {
312
+ return tokenizer->extra->user_data;
313
+ }
314
+
315
+ const char *langscan_sh_token_name(langscan_sh_token_t token)
316
+ {
317
+ static char *token_names[] = {
318
+ "*eof*",
319
+ #define LANGSCAN_SH_TOKEN(name) #name,
320
+ LANGSCAN_SH_TOKEN_LIST
321
+ #undef LANGSCAN_SH_TOKEN
322
+ };
323
+
324
+ return token_names[token];
325
+ }
@@ -0,0 +1,8 @@
1
+ string
2
+ fundef
3
+ ident
4
+ punct
5
+ comment
6
+ space
7
+ heredoc_beg
8
+ heredoc_end
@@ -0,0 +1,124 @@
1
+ #
2
+ # langscan.rb - an interface module of LangScan
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ module LangScan
13
+ VERSION = "1.2"
14
+ LangScanRegistry = {}
15
+
16
+ module_function
17
+ # load *.rb files in _plugin_path_ directory.
18
+ def load_plugins(plugin_path)
19
+ $LOAD_PATH.each {|path|
20
+ candidate_path = File.join(path, plugin_path)
21
+ next unless File.directory?(candidate_path)
22
+ Dir.entries(candidate_path).each {|entry|
23
+ if File.extname(entry) == ".rb" and not /^_/.match(entry)
24
+ begin
25
+ require(File.join(plugin_path, entry))
26
+ rescue LoadError => e
27
+ # ignore load errors
28
+ end
29
+ end
30
+ }
31
+ }
32
+ end
33
+
34
+ # load LangScan modules.
35
+ def load
36
+ load_plugins("langscan")
37
+ end
38
+
39
+ # validate that _mod_ is a LangScan module.
40
+ def validate_module(mod)
41
+ common_methods = [:name, :abbrev, :scan]
42
+ safe_characters = "[a-z]+"
43
+ common_methods.each {|method|
44
+ raise "#{mod.to_s} lacks #{method}" unless mod.respond_to?(method)
45
+ }
46
+ unless /^#{safe_characters}$/.match(mod.abbrev)
47
+ raise "#{mod.to_s} invalid abbreviation: #{mod.abbrev}"
48
+ end
49
+ end
50
+
51
+ # register a new LangScan module _mod_.
52
+ def register(mod)
53
+ validate_module(mod)
54
+ mod.extnames.each {|extname|
55
+ if LangScanRegistry.include?(extname)
56
+ mod = LangScanRegistry[extname]
57
+ raise "#{extname} is already used by #{mod.abbrev}"
58
+ end
59
+ LangScanRegistry[extname] = mod
60
+ }
61
+ end
62
+
63
+ # return an array contains LangScan modules.
64
+ def modules
65
+ LangScanRegistry.values.uniq
66
+ end
67
+
68
+ # return suitable LangScan module choosed by shebang.
69
+ # return +nil+ if suitable LangScan module is not found.
70
+ def choose_by_shebang(content)
71
+ first_line = ""
72
+ content.each_line {|line|
73
+ first_line = line
74
+ break
75
+ }
76
+ LangScanRegistry.each_value {|scanner|
77
+ regexp = /^#!.*\b#{scanner.abbrev}/i
78
+ return scanner if regexp.match(first_line)
79
+ }
80
+ return nil
81
+ end
82
+
83
+
84
+ # return suitable LangScan module choosed by emacs mode.
85
+ # if _content_ contains "-*- mode: c -*-", +choose_by_emacs_mode+ returns
86
+ # LangScan::C.
87
+ # return +nil+ if suitable LangScan module is not found.
88
+ def choose_by_emacs_mode(content)
89
+ chunk = content[0, 512] # FIXME: magic number
90
+ LangScanRegistry.each_value {|scanner|
91
+ mode = Regexp.quote(scanner.name.downcase.gsub(/\s+/, "-"))
92
+ if scanner.name.include?("/") # "C/C++" etc.
93
+ mode = "(" + mode + "|"
94
+ mode << scanner.name.split("/").map {|part| Regexp.quote(part) }.join("|")
95
+ mode << ")"
96
+ end
97
+ regexp = /-\*-\s+mode:\s+#{mode}\s+-\*-/i
98
+ return scanner if regexp.match(chunk)
99
+ }
100
+ return nil
101
+ end
102
+
103
+ # return suitable LangScan module choosed by _content_.
104
+ # +choose_by_content+ calls +choose_by_shebang+ and +choose_by_emacs_mode+.
105
+ def choose_by_content(content)
106
+ return (choose_by_shebang(content) or choose_by_emacs_mode(content))
107
+ end
108
+
109
+ # return suitable LangScan module.
110
+ def choose(file_name, content = nil)
111
+ extname = File.extname(file_name)
112
+ scanner = LangScanRegistry[extname]
113
+ scanner = choose_by_content(content) if scanner.nil? and content
114
+ return scanner
115
+ end
116
+
117
+ # return true if _file_name_ supported by LangScan.
118
+ def support?(file_name)
119
+ extname = File.extname(file_name)
120
+ LangScanRegistry.include?(extname)
121
+ end
122
+ end
123
+
124
+ LangScan.load