langscan 1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (168) hide show
  1. data/AUTHORS.txt +19 -0
  2. data/History.txt +126 -0
  3. data/Manifest.txt +167 -0
  4. data/README.rdoc +89 -0
  5. data/Rakefile +40 -0
  6. data/ext/langscan/_make_c.rb +20 -0
  7. data/ext/langscan/_make_h.rb +30 -0
  8. data/ext/langscan/_template.c +134 -0
  9. data/ext/langscan/_template.h +53 -0
  10. data/ext/langscan/c/c/Makefile +157 -0
  11. data/ext/langscan/c/c/c.c +134 -0
  12. data/ext/langscan/c/c/c.h +66 -0
  13. data/ext/langscan/c/c/ctok.c +4622 -0
  14. data/ext/langscan/c/c/ctok.l +212 -0
  15. data/ext/langscan/c/c/extconf.rb +3 -0
  16. data/ext/langscan/c/c/modulename.txt +1 -0
  17. data/ext/langscan/c/c/tokenlist.txt +13 -0
  18. data/ext/langscan/csharp/csharp/Makefile +157 -0
  19. data/ext/langscan/csharp/csharp/csharp.c +134 -0
  20. data/ext/langscan/csharp/csharp/csharp.h +65 -0
  21. data/ext/langscan/csharp/csharp/csharptok.c +2965 -0
  22. data/ext/langscan/csharp/csharp/csharptok.l +200 -0
  23. data/ext/langscan/csharp/csharp/extconf.rb +3 -0
  24. data/ext/langscan/csharp/csharp/modulename.txt +1 -0
  25. data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
  26. data/ext/langscan/d/d/Makefile +157 -0
  27. data/ext/langscan/d/d/d.c +134 -0
  28. data/ext/langscan/d/d/d.h +64 -0
  29. data/ext/langscan/d/d/dtok.c +5461 -0
  30. data/ext/langscan/d/d/dtok.l +282 -0
  31. data/ext/langscan/d/d/extconf.rb +3 -0
  32. data/ext/langscan/d/d/modulename.txt +1 -0
  33. data/ext/langscan/d/d/tokenlist.txt +11 -0
  34. data/ext/langscan/elisp/elisp/Makefile +157 -0
  35. data/ext/langscan/elisp/elisp/elisp.c +134 -0
  36. data/ext/langscan/elisp/elisp/elisp.h +62 -0
  37. data/ext/langscan/elisp/elisp/elisptok.c +2101 -0
  38. data/ext/langscan/elisp/elisp/elisptok.l +151 -0
  39. data/ext/langscan/elisp/elisp/extconf.rb +3 -0
  40. data/ext/langscan/elisp/elisp/modulename.txt +1 -0
  41. data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
  42. data/ext/langscan/java/java/Makefile +157 -0
  43. data/ext/langscan/java/java/extconf.rb +3 -0
  44. data/ext/langscan/java/java/java.c +134 -0
  45. data/ext/langscan/java/java/java.h +64 -0
  46. data/ext/langscan/java/java/javatok.c +2090 -0
  47. data/ext/langscan/java/java/javatok.l +155 -0
  48. data/ext/langscan/java/java/modulename.txt +1 -0
  49. data/ext/langscan/java/java/tokenlist.txt +11 -0
  50. data/ext/langscan/javascript/javascript/Makefile +157 -0
  51. data/ext/langscan/javascript/javascript/extconf.rb +3 -0
  52. data/ext/langscan/javascript/javascript/javascript.c +134 -0
  53. data/ext/langscan/javascript/javascript/javascript.h +63 -0
  54. data/ext/langscan/javascript/javascript/javascripttok.c +2051 -0
  55. data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
  56. data/ext/langscan/javascript/javascript/modulename.txt +1 -0
  57. data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
  58. data/ext/langscan/pairmatcher/pairmatcher/Makefile +157 -0
  59. data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
  60. data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
  61. data/ext/langscan/php/php/Makefile +157 -0
  62. data/ext/langscan/php/php/extconf.rb +3 -0
  63. data/ext/langscan/php/php/modulename.txt +1 -0
  64. data/ext/langscan/php/php/php.c +134 -0
  65. data/ext/langscan/php/php/php.h +64 -0
  66. data/ext/langscan/php/php/phptok.c +2406 -0
  67. data/ext/langscan/php/php/phptok.l +212 -0
  68. data/ext/langscan/php/php/tokenlist.txt +11 -0
  69. data/ext/langscan/post-distclean.rb +21 -0
  70. data/ext/langscan/pre-config.rb +57 -0
  71. data/ext/langscan/python/python/Makefile +157 -0
  72. data/ext/langscan/python/python/extconf.rb +3 -0
  73. data/ext/langscan/python/python/modulename.txt +1 -0
  74. data/ext/langscan/python/python/python.c +134 -0
  75. data/ext/langscan/python/python/python.h +61 -0
  76. data/ext/langscan/python/python/pythontok.c +2102 -0
  77. data/ext/langscan/python/python/pythontok.l +155 -0
  78. data/ext/langscan/python/python/tokenlist.txt +8 -0
  79. data/ext/langscan/ruby/compat/ripper/Makefile +158 -0
  80. data/ext/langscan/ruby/compat/ripper/depend +1 -0
  81. data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
  82. data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
  83. data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
  84. data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
  85. data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
  86. data/ext/langscan/scheme/scheme/Makefile +157 -0
  87. data/ext/langscan/scheme/scheme/extconf.rb +3 -0
  88. data/ext/langscan/scheme/scheme/modulename.txt +1 -0
  89. data/ext/langscan/scheme/scheme/scheme.c +134 -0
  90. data/ext/langscan/scheme/scheme/scheme.h +60 -0
  91. data/ext/langscan/scheme/scheme/schemetok.c +2447 -0
  92. data/ext/langscan/scheme/scheme/schemetok.l +177 -0
  93. data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
  94. data/ext/langscan/sh/sh/Makefile +157 -0
  95. data/ext/langscan/sh/sh/extconf.rb +3 -0
  96. data/ext/langscan/sh/sh/modulename.txt +1 -0
  97. data/ext/langscan/sh/sh/sh.c +134 -0
  98. data/ext/langscan/sh/sh/sh.h +61 -0
  99. data/ext/langscan/sh/sh/shtok.c +2470 -0
  100. data/ext/langscan/sh/sh/shtok.l +325 -0
  101. data/ext/langscan/sh/sh/tokenlist.txt +8 -0
  102. data/lib/langscan.rb +124 -0
  103. data/lib/langscan/_common.rb +50 -0
  104. data/lib/langscan/_easyscanner.rb +78 -0
  105. data/lib/langscan/_pairmatcher.rb +46 -0
  106. data/lib/langscan/_type.rb +125 -0
  107. data/lib/langscan/autoconf.rb +51 -0
  108. data/lib/langscan/automake.rb +51 -0
  109. data/lib/langscan/brainfuck.rb +48 -0
  110. data/lib/langscan/c.rb +144 -0
  111. data/lib/langscan/csharp.rb +101 -0
  112. data/lib/langscan/css.rb +109 -0
  113. data/lib/langscan/d.rb +201 -0
  114. data/lib/langscan/eiffel.rb +167 -0
  115. data/lib/langscan/elisp.rb +132 -0
  116. data/lib/langscan/io.rb +84 -0
  117. data/lib/langscan/java.rb +95 -0
  118. data/lib/langscan/javascript.rb +97 -0
  119. data/lib/langscan/lua.rb +116 -0
  120. data/lib/langscan/ocaml.rb +298 -0
  121. data/lib/langscan/ocaml/camlexer.ml +28 -0
  122. data/lib/langscan/ocaml/lexer.mll +230 -0
  123. data/lib/langscan/ocaml/types.ml +36 -0
  124. data/lib/langscan/perl.rb +87 -0
  125. data/lib/langscan/perl/tokenizer.pl +231 -0
  126. data/lib/langscan/php.rb +80 -0
  127. data/lib/langscan/python.rb +101 -0
  128. data/lib/langscan/rpmspec.rb +71 -0
  129. data/lib/langscan/ruby.rb +164 -0
  130. data/lib/langscan/ruby/compat/README +5 -0
  131. data/lib/langscan/ruby/compat/ripper.rb +4 -0
  132. data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
  133. data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
  134. data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
  135. data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
  136. data/lib/langscan/scheme.rb +160 -0
  137. data/lib/langscan/sh.rb +116 -0
  138. data/lib/langscan/text.rb +37 -0
  139. data/metaconfig +2 -0
  140. data/script/console +10 -0
  141. data/script/destroy +14 -0
  142. data/script/generate +14 -0
  143. data/script/makemanifest.rb +21 -0
  144. data/setup.rb +1604 -0
  145. data/tasks/extconf.rake +13 -0
  146. data/tasks/extconf/langscan.rake +42 -0
  147. data/test/langscan/brainfuck/test/test_scan.rb +55 -0
  148. data/test/langscan/c/test/test_scan.rb +216 -0
  149. data/test/langscan/c/test/test_token.rb +41 -0
  150. data/test/langscan/csharp/test/test_scan.rb +157 -0
  151. data/test/langscan/css/test/test_css.rb +79 -0
  152. data/test/langscan/d/test/test_scan.rb +233 -0
  153. data/test/langscan/d/test/test_token.rb +205 -0
  154. data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
  155. data/test/langscan/elisp/test/test_elisp.rb +177 -0
  156. data/test/langscan/io/test/test_io.rb +79 -0
  157. data/test/langscan/java/test/test_java.rb +74 -0
  158. data/test/langscan/javascript/test/test_javascript.rb +39 -0
  159. data/test/langscan/lua/test/test_lua.rb +69 -0
  160. data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
  161. data/test/langscan/php/test/test_scan.rb +138 -0
  162. data/test/langscan/python/test/test_scan.rb +105 -0
  163. data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
  164. data/test/langscan/ruby/test/test_scan.rb +71 -0
  165. data/test/langscan/scheme/test/test_scan.rb +198 -0
  166. data/test/test_helper.rb +7 -0
  167. data/test/test_langscan.rb +123 -0
  168. metadata +296 -0
@@ -0,0 +1,325 @@
1
+ /*
2
+ * shtok.l - a lex rule for shell scripts
3
+ *
4
+ * Copyright (C) 2005 Kenichi Ishibashi <bashi at dream.ie.ariake-nct.ac.jp>
5
+ * All rights reserved.
6
+ * This is free software with ABSOLUTELY NO WARRANTY.
7
+ *
8
+ * You can redistribute it and/or modify it under the terms of
9
+ * the GNU General Public License version 2.
10
+ */
11
+
12
+ %option reentrant
13
+ %option prefix="langscan_sh_lex_"
14
+ %option noyywrap
15
+ %option nodefault
16
+ %option stack
17
+ %s DQUOTE
18
+ %s BQUOTE
19
+ %s BRACE_SUBST
20
+ %s PAREN_SUBST
21
+ %s IN_BRACE
22
+ %s HEREDOC_DELIMITER
23
+ %s HEREDOC
24
+
25
+ space [ \t]+
26
+ newline \r\n|\r|\n
27
+ escseq \\({newline}|.)
28
+ ident [0-9A-Za-z_][0-9A-Za-z_\-\.]*
29
+ squote \'[^\']*\'
30
+ specialvar (\$|\#|\*|@|\?|\-|\!|\_)
31
+ var_ident ([A-Za-z_][0-9A-Za-z_]*|[0-9]|{specialvar})
32
+
33
+ %{
34
+
35
+ #include "sh.h"
36
+
37
+ #define YY_EXTRA_TYPE langscan_sh_lex_extra_t *
38
+
39
+ #if YY_NULL != 0
40
+ #error "YY_NULL is not 0."
41
+ #endif
42
+
43
+ #define YY_DECL langscan_sh_token_t langscan_sh_lex_lex(yyscan_t yyscanner)
44
+
45
+ #define YY_INPUT(buf,result,max_size) \
46
+ if (!yyextra->eof) { \
47
+ result = yyextra->user_read(&(yyextra->user_data), (buf), (max_size)); \
48
+ if (result == 0) \
49
+ yyextra->eof = 1; \
50
+ }
51
+
52
+ #define UPD update_pos(yyextra, yytext, yyleng)
53
+ static void update_pos(langscan_sh_lex_extra_t *, char *, int);
54
+
55
+ #define report(token) \
56
+ do { \
57
+ yyextra->text = yytext; \
58
+ yyextra->leng = yyleng; \
59
+ return langscan_sh_##token; \
60
+ } while (0)
61
+
62
+ #define PUSH_STATE(state) yy_push_state(state, yyscanner)
63
+ #define POP_STATE yy_pop_state(yyscanner)
64
+
65
+ static int ident_length(unsigned char *ptr, int max);
66
+
67
+ static char *heredoc_delimiter;
68
+ static enum { HEREDOC_TAB_NO_STRIP, HEREDOC_TAB_STRIP } heredoc_type;
69
+ static int set_heredoc_delimiter(unsigned char *ptr, int max);
70
+
71
+ %}
72
+
73
+ %%
74
+ <INITIAL,IN_BRACE>\<\<\-? {
75
+ if (yytext[yyleng - 1] == '-')
76
+ heredoc_type = HEREDOC_TAB_STRIP;
77
+ else
78
+ heredoc_type = HEREDOC_TAB_NO_STRIP;
79
+ UPD;
80
+ PUSH_STATE(HEREDOC_DELIMITER);
81
+ report(punct);
82
+ }
83
+ <HEREDOC_DELIMITER>[^ \t\r\n].* {
84
+ int delimiter_leng;
85
+ delimiter_leng = set_heredoc_delimiter(yytext, yyleng);
86
+ if (delimiter_leng == -1) YY_FATAL_ERROR("Can't allocate memory");
87
+ yyless(delimiter_leng);
88
+ PUSH_STATE(HEREDOC);
89
+ UPD;
90
+ report(heredoc_beg);
91
+ }
92
+ <HEREDOC>^.+ {
93
+ int sleng;
94
+ sleng = 0;
95
+ if (heredoc_type == HEREDOC_TAB_STRIP) {
96
+ while (yytext[sleng] == ' ' || yytext[sleng] =='\t') {
97
+ sleng++;
98
+ if (sleng >= yyleng) { UPD; report(space); }
99
+ }
100
+ }
101
+ if (strcmp((yytext + sleng), heredoc_delimiter) == 0) { /* end-of-heredoc */
102
+ free(heredoc_delimiter);
103
+ POP_STATE;
104
+ POP_STATE;
105
+ UPD; report(heredoc_end);
106
+ }
107
+ else {
108
+ UPD; report(string);
109
+ }
110
+ }
111
+ <HEREDOC><<EOF>> {
112
+ free(heredoc_delimiter);
113
+ BEGIN(INITIAL);
114
+ UPD; report(string);
115
+ }
116
+
117
+
118
+ <INITIAL,BQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>\" {
119
+ PUSH_STATE(DQUOTE);
120
+ UPD; report(punct);
121
+ }
122
+ <DQUOTE>\" {
123
+ POP_STATE;
124
+ UPD; report(punct);
125
+ }
126
+ <DQUOTE><<EOF>> { BEGIN(INITIAL); }
127
+ <DQUOTE>([^\"\`\$\\]|{escseq})+ { UPD; report(string); }
128
+
129
+
130
+ <INITIAL,DQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>\` {
131
+ PUSH_STATE(BQUOTE);
132
+ UPD; report(punct);
133
+ }
134
+ <BQUOTE>\` {
135
+ POP_STATE;
136
+ UPD; report(punct);
137
+ }
138
+ <BQUOTE><<EOF>> { BEGIN(INITIAL); }
139
+
140
+
141
+ <INITIAL,DQUOTE,BQUOTE,PAREN_SUBST,IN_BRACE>\$\{ {
142
+ PUSH_STATE(BRACE_SUBST);
143
+ UPD; report(punct);
144
+ }
145
+ <BRACE_SUBST>\} {
146
+ POP_STATE;
147
+ UPD; report(punct);
148
+ }
149
+ <BRACE_SUBST>{var_ident} { UPD; report(ident); }
150
+ <BRACE_SUBST><<EOF>> { BEGIN(INITIAL); }
151
+
152
+
153
+ \{ {
154
+ PUSH_STATE(IN_BRACE);
155
+ UPD; report(punct);
156
+ }
157
+ <IN_BRACE>\} {
158
+ POP_STATE;
159
+ UPD; report(punct);
160
+ }
161
+ <IN_BRACE><<EOF>> { BEGIN(INITIAL); }
162
+
163
+
164
+ \$?\( {
165
+ PUSH_STATE(PAREN_SUBST);
166
+ UPD; report(punct);
167
+ }
168
+ <PAREN_SUBST>\) {
169
+ POP_STATE;
170
+ UPD; report(punct);
171
+ }
172
+ <PAREN_SUBST><<EOF>> { BEGIN(INITIAL); }
173
+
174
+
175
+ <INITIAL,BQUOTE,PAREN_SUBST,IN_BRACE>^\#.* { UPD; report(comment); }
176
+ <INITIAL,BQUOTE,PAREN_SUBST,IN_BRACE>{space}\#.* { UPD; report(comment); }
177
+ <INITIAL,BQUOTE,BRACE_SUBST,PAREN_SUBST,IN_BRACE>{squote} { UPD; report(string); }
178
+
179
+ {space} { UPD; report(space); }
180
+ {newline} { UPD; report(space); }
181
+ {ident}[ \t]*\([ \t]*\) { yyless(ident_length(yytext, yyleng)); UPD; report(fundef); }
182
+ {ident} { UPD; report(ident); }
183
+ \${var_ident} { UPD; report(ident); }
184
+ \\. { UPD; report(punct); }
185
+ >=|<=|!=|\;\;|\<\<\<|&&|\|\||>&|<& { UPD; report(punct); }
186
+ . { UPD; report(punct); }
187
+
188
+ %%
189
+
190
+ static void update_pos(
191
+ langscan_sh_lex_extra_t *extra,
192
+ char *text,
193
+ int leng)
194
+ {
195
+ int i, j;
196
+ extra->beg_byteno = extra->end_byteno;
197
+ extra->beg_lineno = extra->end_lineno;
198
+ extra->beg_columnno = extra->end_columnno;
199
+ j = 0;
200
+ for (i = 0; i < leng; i++) {
201
+ if (text[i] == '\n') {
202
+ extra->end_lineno++;
203
+ j = i + 1;
204
+ extra->end_columnno = 0;
205
+ }
206
+ }
207
+ extra->end_columnno += leng - j;
208
+ extra->end_byteno += leng;
209
+ }
210
+
211
+ static int ident_length(unsigned char *ptr, int max)
212
+ {
213
+ int len = 0;
214
+ while (0 < max &&
215
+ (('0' <= *ptr && *ptr <= '9') ||
216
+ ('A' <= *ptr && *ptr <= 'Z') ||
217
+ ('a' <= *ptr && *ptr <= 'z') ||
218
+ *ptr == '_' || *ptr == '-' || *ptr == '.')) {
219
+ ptr++;
220
+ len++;
221
+ max--;
222
+ }
223
+ return len;
224
+ }
225
+
226
+ static int set_heredoc_delimiter(unsigned char *ptr, int max)
227
+ {
228
+ char *dst, quote_char;
229
+ int in_quote, len;
230
+ heredoc_delimiter = malloc(max + 1);
231
+ if (heredoc_delimiter == NULL) return -1;
232
+ dst = heredoc_delimiter;
233
+ len = 0;
234
+ in_quote = 0;
235
+ while (len < max) {
236
+ if (in_quote == 0) { /* unquoted delimiter */
237
+ if (*ptr == '\'' || *ptr == '\"') {
238
+ quote_char = *ptr;
239
+ in_quote = 1;
240
+ ptr++;
241
+ if (++len >= max) break;
242
+ continue;
243
+ }
244
+ if (*ptr == ' ' || *ptr == '\t') break;
245
+ if (*ptr == '\\') {
246
+ ptr++;
247
+ if (++len >= max) break;
248
+ }
249
+ }
250
+ else { /* quoted delimiter */
251
+ if (*ptr == quote_char) {
252
+ in_quote = 0;
253
+ ptr++;
254
+ if (len++ >= max) break;
255
+ continue;
256
+ }
257
+ }
258
+ *dst++ = *ptr++;
259
+ len++;
260
+ }
261
+ *dst = '\0';
262
+ return len;
263
+ }
264
+
265
+ langscan_sh_tokenizer_t *langscan_sh_make_tokenizer(
266
+ size_t (*user_read)(void **user_data_p, char *buf, size_t maxlen),
267
+ void *user_data)
268
+ {
269
+ langscan_sh_tokenizer_t *tokenizer;
270
+ langscan_sh_lex_extra_t *extra;
271
+ tokenizer = (langscan_sh_tokenizer_t *)malloc(sizeof(langscan_sh_tokenizer_t));
272
+ if (tokenizer == NULL)
273
+ return NULL;
274
+ extra = (langscan_sh_lex_extra_t *)malloc(sizeof(langscan_sh_lex_extra_t));
275
+ if (extra == NULL)
276
+ return NULL;
277
+ extra->user_read = user_read;
278
+ extra->user_data = user_data;
279
+ extra->beg_lineno = 1;
280
+ extra->beg_columnno = 0;
281
+ extra->beg_byteno = 0;
282
+ extra->end_lineno = 1;
283
+ extra->end_columnno = 0;
284
+ extra->end_byteno = 0;
285
+ extra->eof = 0;
286
+ tokenizer->extra = extra;
287
+ langscan_sh_lex_lex_init(&tokenizer->scanner);
288
+ langscan_sh_lex_set_extra(extra, tokenizer->scanner);
289
+ return tokenizer;
290
+ }
291
+
292
+ langscan_sh_token_t langscan_sh_get_token(langscan_sh_tokenizer_t *tokenizer)
293
+ {
294
+ return langscan_sh_lex_lex(tokenizer->scanner);
295
+ }
296
+
297
+ void langscan_sh_free_tokenizer(langscan_sh_tokenizer_t *tokenizer)
298
+ {
299
+ langscan_sh_lex_extra_t *extra = langscan_sh_lex_get_extra(tokenizer->scanner);
300
+ free((void *)extra);
301
+ langscan_sh_lex_lex_destroy(tokenizer->scanner);
302
+ free((void *)tokenizer);
303
+ }
304
+
305
+ user_read_t langscan_sh_tokenizer_get_user_read(langscan_sh_tokenizer_t *tokenizer)
306
+ {
307
+ return tokenizer->extra->user_read;
308
+ }
309
+
310
+ void *langscan_sh_tokenizer_get_user_data(langscan_sh_tokenizer_t *tokenizer)
311
+ {
312
+ return tokenizer->extra->user_data;
313
+ }
314
+
315
+ const char *langscan_sh_token_name(langscan_sh_token_t token)
316
+ {
317
+ static char *token_names[] = {
318
+ "*eof*",
319
+ #define LANGSCAN_SH_TOKEN(name) #name,
320
+ LANGSCAN_SH_TOKEN_LIST
321
+ #undef LANGSCAN_SH_TOKEN
322
+ };
323
+
324
+ return token_names[token];
325
+ }
@@ -0,0 +1,8 @@
1
+ string
2
+ fundef
3
+ ident
4
+ punct
5
+ comment
6
+ space
7
+ heredoc_beg
8
+ heredoc_end
data/lib/langscan.rb ADDED
@@ -0,0 +1,124 @@
1
+ #
2
+ # langscan.rb - an interface module of LangScan
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ module LangScan
13
+ VERSION = "1.2"
14
+ LangScanRegistry = {}
15
+
16
+ module_function
17
+ # load *.rb files in _plugin_path_ directory.
18
+ def load_plugins(plugin_path)
19
+ $LOAD_PATH.each {|path|
20
+ candidate_path = File.join(path, plugin_path)
21
+ next unless File.directory?(candidate_path)
22
+ Dir.entries(candidate_path).each {|entry|
23
+ if File.extname(entry) == ".rb" and not /^_/.match(entry)
24
+ begin
25
+ require(File.join(plugin_path, entry))
26
+ rescue LoadError => e
27
+ # ignore load errors
28
+ end
29
+ end
30
+ }
31
+ }
32
+ end
33
+
34
+ # load LangScan modules.
35
+ def load
36
+ load_plugins("langscan")
37
+ end
38
+
39
+ # validate that _mod_ is a LangScan module.
40
+ def validate_module(mod)
41
+ common_methods = [:name, :abbrev, :scan]
42
+ safe_characters = "[a-z]+"
43
+ common_methods.each {|method|
44
+ raise "#{mod.to_s} lacks #{method}" unless mod.respond_to?(method)
45
+ }
46
+ unless /^#{safe_characters}$/.match(mod.abbrev)
47
+ raise "#{mod.to_s} invalid abbreviation: #{mod.abbrev}"
48
+ end
49
+ end
50
+
51
+ # register a new LangScan module _mod_.
52
+ def register(mod)
53
+ validate_module(mod)
54
+ mod.extnames.each {|extname|
55
+ if LangScanRegistry.include?(extname)
56
+ mod = LangScanRegistry[extname]
57
+ raise "#{extname} is already used by #{mod.abbrev}"
58
+ end
59
+ LangScanRegistry[extname] = mod
60
+ }
61
+ end
62
+
63
+ # return an array contains LangScan modules.
64
+ def modules
65
+ LangScanRegistry.values.uniq
66
+ end
67
+
68
+ # return suitable LangScan module choosed by shebang.
69
+ # return +nil+ if suitable LangScan module is not found.
70
+ def choose_by_shebang(content)
71
+ first_line = ""
72
+ content.each_line {|line|
73
+ first_line = line
74
+ break
75
+ }
76
+ LangScanRegistry.each_value {|scanner|
77
+ regexp = /^#!.*\b#{scanner.abbrev}/i
78
+ return scanner if regexp.match(first_line)
79
+ }
80
+ return nil
81
+ end
82
+
83
+
84
+ # return suitable LangScan module choosed by emacs mode.
85
+ # if _content_ contains "-*- mode: c -*-", +choose_by_emacs_mode+ returns
86
+ # LangScan::C.
87
+ # return +nil+ if suitable LangScan module is not found.
88
+ def choose_by_emacs_mode(content)
89
+ chunk = content[0, 512] # FIXME: magic number
90
+ LangScanRegistry.each_value {|scanner|
91
+ mode = Regexp.quote(scanner.name.downcase.gsub(/\s+/, "-"))
92
+ if scanner.name.include?("/") # "C/C++" etc.
93
+ mode = "(" + mode + "|"
94
+ mode << scanner.name.split("/").map {|part| Regexp.quote(part) }.join("|")
95
+ mode << ")"
96
+ end
97
+ regexp = /-\*-\s+mode:\s+#{mode}\s+-\*-/i
98
+ return scanner if regexp.match(chunk)
99
+ }
100
+ return nil
101
+ end
102
+
103
+ # return suitable LangScan module choosed by _content_.
104
+ # +choose_by_content+ calls +choose_by_shebang+ and +choose_by_emacs_mode+.
105
+ def choose_by_content(content)
106
+ return (choose_by_shebang(content) or choose_by_emacs_mode(content))
107
+ end
108
+
109
+ # return suitable LangScan module.
110
+ def choose(file_name, content = nil)
111
+ extname = File.extname(file_name)
112
+ scanner = LangScanRegistry[extname]
113
+ scanner = choose_by_content(content) if scanner.nil? and content
114
+ return scanner
115
+ end
116
+
117
+ # return true if _file_name_ supported by LangScan.
118
+ def support?(file_name)
119
+ extname = File.extname(file_name)
120
+ LangScanRegistry.include?(extname)
121
+ end
122
+ end
123
+
124
+ LangScan.load