langscan 1.2-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. data/AUTHORS.txt +19 -0
  2. data/History.txt +126 -0
  3. data/Manifest.txt +167 -0
  4. data/README.rdoc +91 -0
  5. data/Rakefile +40 -0
  6. data/ext/langscan/_make_c.rb +20 -0
  7. data/ext/langscan/_make_h.rb +30 -0
  8. data/ext/langscan/_template.c +134 -0
  9. data/ext/langscan/_template.h +53 -0
  10. data/ext/langscan/c/c/Makefile +188 -0
  11. data/ext/langscan/c/c/c.c +134 -0
  12. data/ext/langscan/c/c/c.h +66 -0
  13. data/ext/langscan/c/c/ctok.c +4629 -0
  14. data/ext/langscan/c/c/ctok.l +212 -0
  15. data/ext/langscan/c/c/extconf.rb +3 -0
  16. data/ext/langscan/c/c/modulename.txt +1 -0
  17. data/ext/langscan/c/c/tokenlist.txt +13 -0
  18. data/ext/langscan/csharp/csharp/Makefile +188 -0
  19. data/ext/langscan/csharp/csharp/csharp.c +134 -0
  20. data/ext/langscan/csharp/csharp/csharp.h +65 -0
  21. data/ext/langscan/csharp/csharp/csharptok.c +2971 -0
  22. data/ext/langscan/csharp/csharp/csharptok.l +200 -0
  23. data/ext/langscan/csharp/csharp/extconf.rb +3 -0
  24. data/ext/langscan/csharp/csharp/modulename.txt +1 -0
  25. data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
  26. data/ext/langscan/d/d/Makefile +188 -0
  27. data/ext/langscan/d/d/d.c +134 -0
  28. data/ext/langscan/d/d/d.h +64 -0
  29. data/ext/langscan/d/d/dtok.c +5468 -0
  30. data/ext/langscan/d/d/dtok.l +282 -0
  31. data/ext/langscan/d/d/extconf.rb +3 -0
  32. data/ext/langscan/d/d/modulename.txt +1 -0
  33. data/ext/langscan/d/d/tokenlist.txt +11 -0
  34. data/ext/langscan/elisp/elisp/Makefile +188 -0
  35. data/ext/langscan/elisp/elisp/elisp.c +134 -0
  36. data/ext/langscan/elisp/elisp/elisp.h +62 -0
  37. data/ext/langscan/elisp/elisp/elisptok.c +2108 -0
  38. data/ext/langscan/elisp/elisp/elisptok.l +151 -0
  39. data/ext/langscan/elisp/elisp/extconf.rb +3 -0
  40. data/ext/langscan/elisp/elisp/modulename.txt +1 -0
  41. data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
  42. data/ext/langscan/java/java/Makefile +188 -0
  43. data/ext/langscan/java/java/extconf.rb +3 -0
  44. data/ext/langscan/java/java/java.c +134 -0
  45. data/ext/langscan/java/java/java.h +64 -0
  46. data/ext/langscan/java/java/javatok.c +2097 -0
  47. data/ext/langscan/java/java/javatok.l +155 -0
  48. data/ext/langscan/java/java/modulename.txt +1 -0
  49. data/ext/langscan/java/java/tokenlist.txt +11 -0
  50. data/ext/langscan/javascript/javascript/Makefile +188 -0
  51. data/ext/langscan/javascript/javascript/extconf.rb +3 -0
  52. data/ext/langscan/javascript/javascript/javascript.c +134 -0
  53. data/ext/langscan/javascript/javascript/javascript.h +63 -0
  54. data/ext/langscan/javascript/javascript/javascripttok.c +2058 -0
  55. data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
  56. data/ext/langscan/javascript/javascript/modulename.txt +1 -0
  57. data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
  58. data/ext/langscan/pairmatcher/pairmatcher/Makefile +188 -0
  59. data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
  60. data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
  61. data/ext/langscan/php/php/Makefile +188 -0
  62. data/ext/langscan/php/php/extconf.rb +3 -0
  63. data/ext/langscan/php/php/modulename.txt +1 -0
  64. data/ext/langscan/php/php/php.c +134 -0
  65. data/ext/langscan/php/php/php.h +64 -0
  66. data/ext/langscan/php/php/phptok.c +2413 -0
  67. data/ext/langscan/php/php/phptok.l +212 -0
  68. data/ext/langscan/php/php/tokenlist.txt +11 -0
  69. data/ext/langscan/post-distclean.rb +21 -0
  70. data/ext/langscan/pre-config.rb +57 -0
  71. data/ext/langscan/python/python/Makefile +188 -0
  72. data/ext/langscan/python/python/extconf.rb +3 -0
  73. data/ext/langscan/python/python/modulename.txt +1 -0
  74. data/ext/langscan/python/python/python.c +134 -0
  75. data/ext/langscan/python/python/python.h +61 -0
  76. data/ext/langscan/python/python/pythontok.c +2109 -0
  77. data/ext/langscan/python/python/pythontok.l +155 -0
  78. data/ext/langscan/python/python/tokenlist.txt +8 -0
  79. data/ext/langscan/ruby/compat/ripper/Makefile +189 -0
  80. data/ext/langscan/ruby/compat/ripper/depend +1 -0
  81. data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
  82. data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
  83. data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
  84. data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
  85. data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
  86. data/ext/langscan/scheme/scheme/Makefile +188 -0
  87. data/ext/langscan/scheme/scheme/extconf.rb +3 -0
  88. data/ext/langscan/scheme/scheme/modulename.txt +1 -0
  89. data/ext/langscan/scheme/scheme/scheme.c +134 -0
  90. data/ext/langscan/scheme/scheme/scheme.h +60 -0
  91. data/ext/langscan/scheme/scheme/schemetok.c +2454 -0
  92. data/ext/langscan/scheme/scheme/schemetok.l +177 -0
  93. data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
  94. data/ext/langscan/sh/sh/Makefile +188 -0
  95. data/ext/langscan/sh/sh/extconf.rb +3 -0
  96. data/ext/langscan/sh/sh/modulename.txt +1 -0
  97. data/ext/langscan/sh/sh/sh.c +134 -0
  98. data/ext/langscan/sh/sh/sh.h +61 -0
  99. data/ext/langscan/sh/sh/shtok.c +2477 -0
  100. data/ext/langscan/sh/sh/shtok.l +325 -0
  101. data/ext/langscan/sh/sh/tokenlist.txt +8 -0
  102. data/lib/langscan.rb +124 -0
  103. data/lib/langscan/_common.rb +50 -0
  104. data/lib/langscan/_easyscanner.rb +78 -0
  105. data/lib/langscan/_pairmatcher.rb +46 -0
  106. data/lib/langscan/_type.rb +125 -0
  107. data/lib/langscan/autoconf.rb +51 -0
  108. data/lib/langscan/automake.rb +51 -0
  109. data/lib/langscan/brainfuck.rb +48 -0
  110. data/lib/langscan/c.rb +144 -0
  111. data/lib/langscan/c/c.so +0 -0
  112. data/lib/langscan/csharp.rb +101 -0
  113. data/lib/langscan/csharp/csharp.so +0 -0
  114. data/lib/langscan/css.rb +109 -0
  115. data/lib/langscan/d.rb +201 -0
  116. data/lib/langscan/d/d.so +0 -0
  117. data/lib/langscan/eiffel.rb +167 -0
  118. data/lib/langscan/elisp.rb +132 -0
  119. data/lib/langscan/elisp/elisp.so +0 -0
  120. data/lib/langscan/io.rb +84 -0
  121. data/lib/langscan/java.rb +95 -0
  122. data/lib/langscan/java/java.so +0 -0
  123. data/lib/langscan/javascript.rb +97 -0
  124. data/lib/langscan/javascript/javascript.so +0 -0
  125. data/lib/langscan/lua.rb +116 -0
  126. data/lib/langscan/ocaml.rb +298 -0
  127. data/lib/langscan/ocaml/camlexer.ml +28 -0
  128. data/lib/langscan/ocaml/lexer.mll +230 -0
  129. data/lib/langscan/ocaml/types.ml +36 -0
  130. data/lib/langscan/pairmatcher/pairmatcher.so +0 -0
  131. data/lib/langscan/perl.rb +87 -0
  132. data/lib/langscan/perl/tokenizer.pl +231 -0
  133. data/lib/langscan/php.rb +80 -0
  134. data/lib/langscan/php/php.so +0 -0
  135. data/lib/langscan/python.rb +101 -0
  136. data/lib/langscan/python/python.so +0 -0
  137. data/lib/langscan/rpmspec.rb +71 -0
  138. data/lib/langscan/ruby.rb +164 -0
  139. data/lib/langscan/ruby/compat/README +5 -0
  140. data/lib/langscan/ruby/compat/ripper.rb +4 -0
  141. data/lib/langscan/ruby/compat/ripper.so +0 -0
  142. data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
  143. data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
  144. data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
  145. data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
  146. data/lib/langscan/scheme.rb +160 -0
  147. data/lib/langscan/scheme/scheme.so +0 -0
  148. data/lib/langscan/sh.rb +116 -0
  149. data/lib/langscan/sh/sh.so +0 -0
  150. data/lib/langscan/text.rb +37 -0
  151. data/metaconfig +2 -0
  152. data/script/console +10 -0
  153. data/script/destroy +14 -0
  154. data/script/generate +14 -0
  155. data/script/makemanifest.rb +21 -0
  156. data/setup.rb +1604 -0
  157. data/tasks/extconf.rake +13 -0
  158. data/tasks/extconf/langscan.rake +42 -0
  159. data/test/langscan/brainfuck/test/test_scan.rb +55 -0
  160. data/test/langscan/c/test/test_scan.rb +216 -0
  161. data/test/langscan/c/test/test_token.rb +41 -0
  162. data/test/langscan/csharp/test/test_scan.rb +157 -0
  163. data/test/langscan/css/test/test_css.rb +79 -0
  164. data/test/langscan/d/test/test_scan.rb +233 -0
  165. data/test/langscan/d/test/test_token.rb +205 -0
  166. data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
  167. data/test/langscan/elisp/test/test_elisp.rb +177 -0
  168. data/test/langscan/io/test/test_io.rb +79 -0
  169. data/test/langscan/java/test/test_java.rb +74 -0
  170. data/test/langscan/javascript/test/test_javascript.rb +39 -0
  171. data/test/langscan/lua/test/test_lua.rb +69 -0
  172. data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
  173. data/test/langscan/php/test/test_scan.rb +138 -0
  174. data/test/langscan/python/test/test_scan.rb +105 -0
  175. data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
  176. data/test/langscan/ruby/test/test_scan.rb +71 -0
  177. data/test/langscan/scheme/test/test_scan.rb +198 -0
  178. data/test/test_helper.rb +7 -0
  179. data/test/test_langscan.rb +123 -0
  180. metadata +320 -0
@@ -0,0 +1,28 @@
1
+ (*
2
+ camlexer - Lexical Analyzer for Gonzui ocamlsupport
3
+
4
+ Copyright (C) 2005 Soutaro Matsumoto <matsumoto@soutaro.com>
5
+ All rights reserved.
6
+ This is free software with ABSOLUTELY NO WARRANTY.
7
+
8
+ You can redistribute it and/or modify it under the terms of
9
+ the GNU General Public License version 2.
10
+ *)
11
+
12
+ (* $Id: camlexer.ml,v 1.1.1.1 2005/09/15 19:38:39 bashi Exp $ *)
13
+
14
+ let main () =
15
+ try
16
+ let lexbuf = Lexing.from_channel stdin in
17
+ while true do
18
+ let ((lnum,bnum),tname,lexed_str) = (Lexer.token lexbuf) in
19
+ begin
20
+ Printf.printf "%d:%d:%s:%s\n" lnum bnum (Types.to_string tname) lexed_str;
21
+ flush stdout;
22
+ end
23
+ done
24
+ with
25
+ Lexer.EOF -> exit 0
26
+
27
+ let _ = main ()
28
+
@@ -0,0 +1,230 @@
1
+ (*
2
+ camlexer - Lexical Analyzer for Gonzui ocamlsupport
3
+
4
+ Copyright (C) 2005 Soutaro Matsumoto <matsumoto@soutaro.com>
5
+ All rights reserved.
6
+ This is free software with ABSOLUTELY NO WARRANTY.
7
+
8
+ You can redistribute it and/or modify it under the terms of
9
+ the GNU General Public License version 2.
10
+
11
+ *)
12
+
13
+ (* $Id: lexer.mll,v 1.1.1.1 2005/09/15 19:38:39 bashi Exp $ *)
14
+
15
+ {
16
+ exception EOF
17
+
18
+ open Types
19
+
20
+ let lnum = ref 1
21
+ let inc_lnum () =
22
+ begin
23
+ lnum := !lnum+1;
24
+ end
25
+
26
+ let reset () =
27
+ lnum := 1
28
+
29
+ let get_pos lexbuf =
30
+ let pos = Lexing.lexeme_start_p lexbuf in
31
+ let boff = pos.Lexing.pos_bol in
32
+ let cnum = pos.Lexing.pos_cnum in
33
+ !lnum,boff+cnum
34
+
35
+ let str_lexbuf = ref (None: (Lexing.lexbuf) option)
36
+
37
+ }
38
+
39
+ let newline = ('\n' | '\r' | "\r\n")
40
+ let blank = [' ' '\t']
41
+ let letter = ['a'-'z' 'A'-'Z']
42
+ let num = ['0'-'9']
43
+ let ident = (letter | '_') (letter | num | '_' | '\'')*
44
+ let int_lit =
45
+ (('-')? num (num | '_')*)
46
+ | (('-')? ("0x"|"0X") (num | ['A'-'F'] ['a'-'f']) (num | ['A'-'F'] ['a'-'f'] | '_')*)
47
+ | (('-')? ("0o"|"0O") (['0'-'7']) (['0'-'7'] | '_')*)
48
+ | (('-')? ("0b"|"0B") (['0'-'1']) (['0'-'1'] | '_')*)
49
+ let float_lit =
50
+ ('-')? num (num | '_')* ('.' (num | '_')*)? (("e"|"E") ('+'|'-')? num (num | '_')*)?
51
+ let regular_char = [^ '\'']
52
+ let escape_sequence =
53
+ '\\' ['\\' '\"' '\'' 'n' 't' 'b' 'r']
54
+ | '\\' num num num
55
+ | "\\x" (num | ['A'-'F'] | ['a'-'f']) (num | ['A'-'F'] | ['a'-'f'])
56
+ let char_lit =
57
+ '\'' regular_char '\''
58
+ | '\'' escape_sequence '\''
59
+ let label = ['a'-'z'] (letter | num | '_' | '\'')*
60
+ let operator_char = ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
61
+ let infix_symbol = ['=' '<' '>' '@' '|' '&' '+' '-' '*' '/' '$' '%'] operator_char*
62
+ let prefix_symbol = ['!' '?' '~'] operator_char*
63
+ let keywords =
64
+ "and" | "as" | "assert" | "asr" | "begin" | "class"
65
+ | "constraint" | "do" | "done" | "downto" | "else" | "end"
66
+ | "exception" | "external" | "false" | "for" | "fun" | "function"
67
+ | "functor" | "if" | "in" | "include" | "inherit" | "initializer"
68
+ | "land" | "lazy" | "let" | "lor" | "lsl" | "lsr"
69
+ | "lxor" | "match" | "method" | "mod" | "module" | "mutable"
70
+ | "new" | "object" | "of" | "open" | "or" | "private"
71
+ | "rec" | "sig" | "struct" | "then" | "to" | "true"
72
+ | "try" | "type" | "val" | "virtual" | "when" | "while" | "with"
73
+ let puncts =
74
+ "!=" | "#" | "&" | "&&" | "\'" | "(" | ")" | "*" | "+" | "," | "-"
75
+ | "-." | "->" | "." | ".." | ":" | "::" | ":=" | ":>" | ";" | ";;" | "<"
76
+ | "<-" | "=" | ">" | ">]" | ">}" | "?" | "??" | "[" | "[<" | "[>" | "[|"
77
+ | "]" | "_" | "`" | "{" | "{<" | "|" | "|]" | "}" | "~"
78
+ let camlp4_keywords = "parser"
79
+ let camlp4_puncts =
80
+ "<<" | "<:" | ">>" | "$" | "$$" | "$:"
81
+ let ocamlyacc_keywords =
82
+ "%token" | "%start" | "%type" | "%left" | "%right" | "%nonassoc" | "%prec"
83
+ let ocamlyacc_puncts =
84
+ "%{" | "%}" | "%%"
85
+ let ocamlyacc_ident = "$" num+
86
+ let linenum_directive = '#' ' ' num+
87
+ | '#' ' ' num+ ' ' '\"' [^ '\"']* '\"'
88
+ let built_in_constants = "false" | "true" | "()" | "[]"
89
+
90
+ rule token = parse
91
+ | newline
92
+ {
93
+ begin
94
+ inc_lnum();
95
+ token lexbuf;
96
+ end
97
+ }
98
+ | blank +
99
+ { token lexbuf }
100
+ | linenum_directive {
101
+ (get_pos lexbuf, Ttext, Lexing.lexeme lexbuf);
102
+ }
103
+ | keywords | camlp4_keywords | ocamlyacc_keywords {
104
+ (get_pos lexbuf, Tkeyword, Lexing.lexeme lexbuf)
105
+ }
106
+ | built_in_constants {
107
+ (get_pos lexbuf, Tkeyword, Lexing.lexeme lexbuf)
108
+ }
109
+ | "/*" {
110
+ let pos = get_pos lexbuf in
111
+ (pos, Tcomment, ocamlyacc_comment 0 "/*" lexbuf);
112
+ }
113
+ | "(*" {
114
+ let pos = get_pos lexbuf in
115
+ (pos, Tcomment, comment 0 "(*" lexbuf)
116
+ }
117
+ | '\"' {
118
+ let pos = get_pos lexbuf in
119
+ (pos, Tstring, string "\"" lexbuf) }
120
+ | puncts | camlp4_puncts | ocamlyacc_puncts {
121
+ (get_pos lexbuf, Tpunct, Lexing.lexeme lexbuf)
122
+ }
123
+ | infix_symbol | prefix_symbol {
124
+ (get_pos lexbuf, Tident, Lexing.lexeme lexbuf)
125
+ }
126
+ | ('~'|'?') label ':' {
127
+ let s = Lexing.lexeme lexbuf in
128
+ let name = String.sub s 1 (String.length s - 2) in
129
+ (get_pos lexbuf, Tident, s)
130
+ }
131
+ | ident | ocamlyacc_ident {
132
+ (get_pos lexbuf, Tident, Lexing.lexeme lexbuf)
133
+ }
134
+ | char_lit {
135
+ (get_pos lexbuf, Tchar, Lexing.lexeme lexbuf)
136
+ }
137
+ | int_lit {
138
+ (get_pos lexbuf, Tint, Lexing.lexeme lexbuf)
139
+ }
140
+ | float_lit {
141
+ (get_pos lexbuf, Tfloat, Lexing.lexeme lexbuf)
142
+ }
143
+ | eof { raise EOF }
144
+ | _
145
+ { token lexbuf }
146
+
147
+ and comment lv acc = parse
148
+ | newline {
149
+ begin
150
+ inc_lnum();
151
+ comment lv (acc ^ "\\o") lexbuf;
152
+ end
153
+ }
154
+ | "(*" {
155
+ comment (lv+1) (acc ^ Lexing.lexeme lexbuf) lexbuf
156
+ }
157
+ | "*)" {
158
+ if lv = 0
159
+ then
160
+ acc ^ "*)"
161
+ else
162
+ comment (lv-1) (acc ^ Lexing.lexeme lexbuf) lexbuf
163
+ }
164
+ | ([^ '\\'] as c1) "\"" {
165
+ let s = string "\"" lexbuf in
166
+ match !str_lexbuf with
167
+ Some lexbuf -> comment lv (acc ^ Printf.sprintf "%c" c1 ^ s) lexbuf
168
+ }
169
+ | char_lit {
170
+ comment lv (acc ^ Lexing.lexeme lexbuf) lexbuf
171
+ }
172
+ | _ {
173
+ let s = Lexing.lexeme lexbuf in
174
+ comment lv (acc^s) lexbuf
175
+ }
176
+
177
+ and string acc = parse
178
+ | newline {
179
+ begin
180
+ inc_lnum();
181
+ string (acc ^ "\\o") lexbuf;
182
+ end
183
+ }
184
+ | '\"' {
185
+ begin
186
+ str_lexbuf := Some lexbuf;
187
+ acc ^ "\"";
188
+ end
189
+ }
190
+ | escape_sequence {
191
+ string (acc ^ Lexing.lexeme lexbuf) lexbuf
192
+ }
193
+ | char_lit {
194
+ string (acc ^ Lexing.lexeme lexbuf) lexbuf
195
+ }
196
+ | _ {
197
+ let s = Lexing.lexeme lexbuf in
198
+ string (acc^s) lexbuf
199
+ }
200
+
201
+ and ocamlyacc_comment lv acc = parse
202
+ | newline {
203
+ begin
204
+ inc_lnum();
205
+ ocamlyacc_comment lv (acc ^ "\\o") lexbuf;
206
+ end
207
+ }
208
+ | "/*" {
209
+ ocamlyacc_comment (lv+1) (acc ^ Lexing.lexeme lexbuf) lexbuf
210
+ }
211
+ | "*/" {
212
+ if lv = 0
213
+ then
214
+ acc ^ "*/"
215
+ else
216
+ ocamlyacc_comment (lv-1) (acc ^ Lexing.lexeme lexbuf) lexbuf
217
+ }
218
+ | "\"" {
219
+ let s = string "\"" lexbuf in
220
+ match !str_lexbuf with
221
+ Some lexbuf -> ocamlyacc_comment lv (acc ^ s) lexbuf
222
+ }
223
+ | char_lit {
224
+ ocamlyacc_comment lv (acc ^ Lexing.lexeme lexbuf) lexbuf
225
+ }
226
+ | _ {
227
+ let s = Lexing.lexeme lexbuf in
228
+ ocamlyacc_comment lv (acc^s) lexbuf
229
+ }
230
+
@@ -0,0 +1,36 @@
1
+ (*
2
+ camlexer - Lexical Analyzer for Gonzui ocamlsupport
3
+
4
+ Copyright (C) 2005 Soutaro Matsumoto <matsumoto@soutaro.com>
5
+ All rights reserved.
6
+ This is free software with ABSOLUTELY NO WARRANTY.
7
+
8
+ You can redistribute it and/or modify it under the terms of
9
+ the GNU General Public License version 2.
10
+ *)
11
+
12
+ (* $Id: types.ml,v 1.1.1.1 2005/09/15 19:38:38 bashi Exp $ *)
13
+
14
+ type gonzui_type = Tident
15
+ | Tpunct
16
+ | Tfuncdef
17
+ | Ttext
18
+ | Tstring
19
+ | Tcomment
20
+ | Tkeyword
21
+ | Tchar
22
+ | Tint
23
+ | Tfloat
24
+
25
+ let to_string = function
26
+ Tident -> "ident"
27
+ | Tpunct -> "punct"
28
+ | Tfuncdef -> "funcdef"
29
+ | Ttext -> "text"
30
+ | Tstring -> "string"
31
+ | Tcomment -> "comment"
32
+ | Tkeyword -> "keyword"
33
+ | Tchar -> "character"
34
+ | Tfloat -> "float"
35
+ | Tint -> "integer"
36
+
@@ -0,0 +1,87 @@
1
+ #
2
+ # perl.rb - a Perl module of LangScan
3
+ #
4
+ # Copyright (C) 2005 Tatsuhiko Miyagawa <miyagawa@bulknews.net>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ dn = "/dev/null"
13
+ dn = "nul" if (/mswin|mingw|bccwin/ =~ RUBY_PLATFORM)
14
+ unless system("perl -MPPI -e 1 2>#{dn}")
15
+ raise LoadError.new("PPI module is required")
16
+ end
17
+
18
+ require 'langscan/_common'
19
+
20
+ module LangScan
21
+ module Perl
22
+ module_function
23
+
24
+ def name
25
+ "Perl"
26
+ end
27
+
28
+ def abbrev
29
+ "perl"
30
+ end
31
+
32
+ def extnames
33
+ [".pl", ".PL", ".pm", ".t" ] # XXX remove ".t"
34
+ end
35
+
36
+ PERLTOKENIZER_PATH = $LOAD_PATH.map {|path|
37
+ File.join(path, "langscan/perl/tokenizer.pl")
38
+ }.find {|path| File.file?(path) }
39
+ raise "tokenizer.pl not found" if PERLTOKENIZER_PATH.nil?
40
+
41
+ def shell_escape(file_name)
42
+ '"' + file_name.gsub(/([$"\\`])/, "\\\\\\1") + '"'
43
+ end
44
+
45
+ def open_tokenizer
46
+ command_line = sprintf("perl %s 2>/dev/null",
47
+ shell_escape(PERLTOKENIZER_PATH))
48
+ @io = IO.popen(command_line, "r+")
49
+ end
50
+
51
+ # LangScan::Perl.scan iterates over Perl program.
52
+ # It yields for each Fragment.
53
+ def scan(input)
54
+ open_tokenizer if @io.nil? or @io.closed? # in case of Perl error
55
+ @io.puts(input.length)
56
+ @io.write(input)
57
+ inputlen = input.length
58
+ buflen = 0
59
+ begin
60
+ while (buflen < inputlen)
61
+ type = @io.readline.chomp.intern
62
+ lineno = @io.readline.chomp.to_i
63
+ byteno = @io.readline.chomp.to_i
64
+ bodylen = @io.readline.chomp.to_i
65
+ text = @io.read(bodylen)
66
+ if type.nil? or text.nil? or lineno.nil? or byteno.nil?
67
+ raise ScanFailed.new("Unexpected output from tokenizer.pl")
68
+ end
69
+ yield Fragment.new(type, text, lineno, byteno)
70
+ @io.read(1) # newline
71
+ buflen += bodylen
72
+ end
73
+ rescue EOFError
74
+ @io.close
75
+ raise ScanFailed.new("tokenizer.pl failed to parse")
76
+ end
77
+ end
78
+
79
+ LangScan.register(self)
80
+ end
81
+ end
82
+
83
+
84
+
85
+
86
+
87
+
@@ -0,0 +1,231 @@
1
+ # tokenizer.pl: tokenize Perl scripts as gonzui langscan format
2
+ #
3
+ # Author: Tatsuhiko Miyagawa <miyagawa@bulknews.net>
4
+ # License: Same as Perl (Artistic/GPL2)
5
+ #
6
+
7
+ use strict;
8
+ use PPI::Tokenizer;
9
+ $PPI::Tokenizer::ALLOW_NONASCII = 1;
10
+
11
+ our $Debug = 0;
12
+ $| = 1;
13
+
14
+ # TODO:
15
+ # 'string' is abused
16
+ # regexp is string
17
+ # PPI fails to tokenize source code with UTF-8 binary
18
+
19
+ our(%TokenMap, %ReservedWords, %BuiltinFunctions);
20
+
21
+ if ($ARGV[0] && $ARGV[0] eq '-d') {
22
+ # debug mode
23
+ open my $fh, $ARGV[1] or die "$ARGV[1]: $!";
24
+ my $code = join '', <$fh>;
25
+ Tokenizer->new->tokenize(\$code);
26
+ } else {
27
+ # persistent mode
28
+ my $tokenizer = Tokenizer->new;
29
+ while (1) {
30
+ chomp(my $length = <STDIN>);
31
+ last unless defined $length;
32
+ read(STDIN, my($code), $length);
33
+ $tokenizer->tokenize(\$code);
34
+ $tokenizer->reset();
35
+ }
36
+ }
37
+
38
+ package Tokenizer;
39
+
40
+ sub new {
41
+ my $class = shift;
42
+ my $self = bless { }, $class;
43
+ $self->reset();
44
+ $self;
45
+ }
46
+
47
+ sub reset {
48
+ my $self = shift;
49
+ $self->{lineno} = 0;
50
+ $self->{byteno} = 0;
51
+ $self->{heredoc} = undef;
52
+ $self->{in_sub} = undef;
53
+ $self->{in_package} = undef;
54
+ $self->{in_arrow} = undef;
55
+ $self->{in_usereq} = undef;
56
+ }
57
+
58
+ sub tokenize {
59
+ my($self, $coderef) = @_;
60
+ my $tokenizer = PPI::Tokenizer->new($coderef) or die "Can't tokenize code: $$coderef";
61
+ while (my $token = $tokenizer->get_token) {
62
+ $self->dump_element($token);
63
+ }
64
+ my $code_length = length $$coderef;
65
+ $self->{byteno} == $code_length or die "Tokenize error: $self->{byteno}:$code_length";
66
+ }
67
+
68
+ sub dump_element {
69
+ my($self, $element) = @_;
70
+ if ($element->isa('PPI::Token::HereDoc')) {
71
+ $self->_dump("punct", $element->content);
72
+ $self->{heredoc} ||= [];
73
+ push @{$self->{heredoc}}, {
74
+ body => $element->{_heredoc},
75
+ eof => $element->{_terminator_line},
76
+ };
77
+ return;
78
+ } elsif ($self->{heredoc} && $element->isa('PPI::Token::Whitespace') && $element->content eq "\n") {
79
+ $self->_dump(token_name($element), $element->content);
80
+ for my $heredoc (@{$self->{heredoc}}) {
81
+ $self->_dump(string => join "", @{$heredoc->{body}});
82
+ $self->_dump(punct => $heredoc->{eof});
83
+ }
84
+ $self->{heredoc} = undef;
85
+ return;
86
+ } elsif ($element->isa('PPI::Token::Word') && $element->content eq 'sub') {
87
+ $self->{in_sub} = 1;
88
+ } elsif ($element->isa('PPI::Token::Word') && $element->content eq 'package') {
89
+ $self->{in_package} = 1;
90
+ } elsif ($element->isa('PPI::Token::Word') && ($element->content eq 'use' || $element->content eq 'require')) {
91
+ $self->{in_usereq} = 1;
92
+ } elsif ($element->isa('PPI::Token::Operator') && $element->content eq '->') {
93
+ $self->{in_arrow} = 1;
94
+ } elsif ($self->{in_sub} && !$element->isa('PPI::Token::Whitespace')) {
95
+ $self->{in_sub} = undef;
96
+ if ($element->isa('PPI::Token::Word')) {
97
+ warn "sub $element->{content}\n" if $Debug;
98
+ $self->_dump(fundef => $element->content);
99
+ return;
100
+ }
101
+ } elsif ($self->{in_package} && !$element->isa('PPI::Token::Whitespace')) {
102
+ $self->{in_package} = undef;
103
+ if ($element->isa('PPI::Token::Word')) {
104
+ warn "package $element->{content}\n" if $Debug;
105
+ $self->_dump(classdef => $element->content);
106
+ return;
107
+ }
108
+ } elsif ($self->{in_arrow} && !$element->isa('PPI::Token::Whitespace')) {
109
+ $self->{in_arrow} = undef;
110
+ if ($element->isa('PPI::Token::Word')) {
111
+ warn "->$element->{content}\n" if $Debug;
112
+ $self->_dump(funcall => $element->content);
113
+ return;
114
+ }
115
+ } elsif ($self->{in_usereq} && !$element->isa('PPI::Token::Whitespace')) {
116
+ $self->{in_usereq} = undef;
117
+ if ($element->isa('PPI::Token::Word')) {
118
+ warn "use $element->{content}\n" if $Debug;
119
+ $self->_dump(classref => $element->content);
120
+ return;
121
+ }
122
+ }
123
+ $self->_dump(token_name($element), $element->content);
124
+ }
125
+
126
+ sub _dump {
127
+ my($self, $type, $text) = @_;
128
+ my $bodysize = length $text;
129
+ print <<DUMP;
130
+ $type
131
+ $self->{lineno}
132
+ $self->{byteno}
133
+ $bodysize
134
+ $text
135
+ DUMP
136
+ ;
137
+ $self->{byteno} += $bodysize;
138
+ $self->{lineno} += $text =~ tr/\n//d;
139
+ }
140
+
141
+ sub token_name {
142
+ my $token = shift;
143
+ if ($token->isa('PPI::Token::Word')) {
144
+ return $ReservedWords{$token->content} ? "keyword" :
145
+ $BuiltinFunctions{$token->content} ? "funcall" : "word";
146
+ } elsif (ref($token) eq 'PPI::Token::Number') {
147
+ return $token->{_subtype} eq 'base256' ? "floating" : "integer";
148
+ }
149
+ $TokenMap{ref($token)} || "word";
150
+ }
151
+
152
+ BEGIN {
153
+ %TokenMap = qw(
154
+ PPI::Token::ArrayIndex ident
155
+ PPI::Token::Attribute fundef
156
+ PPI::Token::Cast punct
157
+ PPI::Token::Comment text
158
+ PPI::Token::DashedWord punct
159
+ PPI::Token::Data text
160
+ PPI::Token::End punct
161
+ PPI::Token::HereDoc *
162
+ PPI::Token::Label word
163
+ PPI::Token::Magic punct
164
+ PPI::Token::Number *
165
+ PPI::Token::Operator punct
166
+ PPI::Token::Pod text
167
+ PPI::Token::Prototype punct
168
+ PPI::Token::Quote::Double string
169
+ PPI::Token::Quote::Interpolate string
170
+ PPI::Token::Quote::Literal string
171
+ PPI::Token::Quote::Single string
172
+ PPI::Token::QuoteLike::Backtick string
173
+ PPI::Token::QuoteLike::Command string
174
+ PPI::Token::QuoteLike::Readline string
175
+ PPI::Token::QuoteLike::Regexp string
176
+ PPI::Token::QuoteLike::Words string
177
+ PPI::Token::Regexp::Match word
178
+ PPI::Token::Regexp::Substitute word
179
+ PPI::Token::Regexp::Transliterate word
180
+ PPI::Token::Separator punct
181
+ PPI::Token::Structure punct
182
+ PPI::Token::Symbol ident
183
+ PPI::Token::Unknown punct
184
+ PPI::Token::Whitespace punct
185
+ PPI::Token::Word *
186
+ );
187
+
188
+ # borrowed from Apache::PrettyPerl, with slight fixes
189
+ %ReservedWords = map { $_ => 1 } qw(
190
+ while until for foreach unless if elsif else do
191
+ package use no require import and or eq ne cmp
192
+ my our local next last redo goto return sub
193
+ );
194
+ %BuiltinFunctions = map { $_ => 1 } qw(
195
+ abs accept alarm atan2 bind binmode bless
196
+ caller chdir chmod chomp chop chown chr
197
+ chroot close closedir connect continue cos
198
+ crypt dbmclose dbmopen defined delete die
199
+ dump each endgrent endhostent endnetent
200
+ endprotoent endpwent endservent eof eval
201
+ exec exists exit exp fcntl fileno flock
202
+ fork format formline getc getgrent getgrgid
203
+ getgrnam gethostbyaddr gethostbyname gethostent
204
+ getlogin getnetbyaddr getnetbyname getnetent
205
+ getpeername getpgrp getppid getpriority
206
+ getprotobyname getprotobynumber getprotoent
207
+ getpwent getpwnam getpwuid getservbyname
208
+ getservbyport getservent getsockname
209
+ getsockopt glob gmtime goto grep hex index
210
+ int ioctl join keys kill last lc lcfirst
211
+ length link listen local localtime log
212
+ lstat map mkdir msgctl msgget msgrcv
213
+ msgsnd my next oct open opendir ord our pack
214
+ pipe pop pos print printf prototype push
215
+ quotemeta rand read readdir readline
216
+ readlink readpipe recv redo ref rename
217
+ reset return reverse rewinddir rindex
218
+ rmdir scalar seek seekdir select semctl
219
+ semget semop send setgrent sethostent
220
+ setnetent setpgrp setpriority setprotoent
221
+ setpwent setservent setsockopt shift shmctl
222
+ shmget shmread shmwrite shutdown sin sleep
223
+ socket socketpair sort splice split sprintf
224
+ sqrt srand stat study sub substr symlink
225
+ syscall sysopen sysread sysread sysseek
226
+ system syswrite tell telldir tie tied
227
+ time times truncate uc ucfirst umask undef
228
+ unlink unpack unshift untie utime values
229
+ vec wait waitpid wantarray warn write
230
+ );
231
+ }