langscan 1.2-x86-mswin32-60
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS.txt +19 -0
- data/History.txt +126 -0
- data/Manifest.txt +167 -0
- data/README.rdoc +91 -0
- data/Rakefile +40 -0
- data/ext/langscan/_make_c.rb +20 -0
- data/ext/langscan/_make_h.rb +30 -0
- data/ext/langscan/_template.c +134 -0
- data/ext/langscan/_template.h +53 -0
- data/ext/langscan/c/c/Makefile +188 -0
- data/ext/langscan/c/c/c.c +134 -0
- data/ext/langscan/c/c/c.h +66 -0
- data/ext/langscan/c/c/ctok.c +4629 -0
- data/ext/langscan/c/c/ctok.l +212 -0
- data/ext/langscan/c/c/extconf.rb +3 -0
- data/ext/langscan/c/c/modulename.txt +1 -0
- data/ext/langscan/c/c/tokenlist.txt +13 -0
- data/ext/langscan/csharp/csharp/Makefile +188 -0
- data/ext/langscan/csharp/csharp/csharp.c +134 -0
- data/ext/langscan/csharp/csharp/csharp.h +65 -0
- data/ext/langscan/csharp/csharp/csharptok.c +2971 -0
- data/ext/langscan/csharp/csharp/csharptok.l +200 -0
- data/ext/langscan/csharp/csharp/extconf.rb +3 -0
- data/ext/langscan/csharp/csharp/modulename.txt +1 -0
- data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
- data/ext/langscan/d/d/Makefile +188 -0
- data/ext/langscan/d/d/d.c +134 -0
- data/ext/langscan/d/d/d.h +64 -0
- data/ext/langscan/d/d/dtok.c +5468 -0
- data/ext/langscan/d/d/dtok.l +282 -0
- data/ext/langscan/d/d/extconf.rb +3 -0
- data/ext/langscan/d/d/modulename.txt +1 -0
- data/ext/langscan/d/d/tokenlist.txt +11 -0
- data/ext/langscan/elisp/elisp/Makefile +188 -0
- data/ext/langscan/elisp/elisp/elisp.c +134 -0
- data/ext/langscan/elisp/elisp/elisp.h +62 -0
- data/ext/langscan/elisp/elisp/elisptok.c +2108 -0
- data/ext/langscan/elisp/elisp/elisptok.l +151 -0
- data/ext/langscan/elisp/elisp/extconf.rb +3 -0
- data/ext/langscan/elisp/elisp/modulename.txt +1 -0
- data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
- data/ext/langscan/java/java/Makefile +188 -0
- data/ext/langscan/java/java/extconf.rb +3 -0
- data/ext/langscan/java/java/java.c +134 -0
- data/ext/langscan/java/java/java.h +64 -0
- data/ext/langscan/java/java/javatok.c +2097 -0
- data/ext/langscan/java/java/javatok.l +155 -0
- data/ext/langscan/java/java/modulename.txt +1 -0
- data/ext/langscan/java/java/tokenlist.txt +11 -0
- data/ext/langscan/javascript/javascript/Makefile +188 -0
- data/ext/langscan/javascript/javascript/extconf.rb +3 -0
- data/ext/langscan/javascript/javascript/javascript.c +134 -0
- data/ext/langscan/javascript/javascript/javascript.h +63 -0
- data/ext/langscan/javascript/javascript/javascripttok.c +2058 -0
- data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
- data/ext/langscan/javascript/javascript/modulename.txt +1 -0
- data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
- data/ext/langscan/pairmatcher/pairmatcher/Makefile +188 -0
- data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
- data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
- data/ext/langscan/php/php/Makefile +188 -0
- data/ext/langscan/php/php/extconf.rb +3 -0
- data/ext/langscan/php/php/modulename.txt +1 -0
- data/ext/langscan/php/php/php.c +134 -0
- data/ext/langscan/php/php/php.h +64 -0
- data/ext/langscan/php/php/phptok.c +2413 -0
- data/ext/langscan/php/php/phptok.l +212 -0
- data/ext/langscan/php/php/tokenlist.txt +11 -0
- data/ext/langscan/post-distclean.rb +21 -0
- data/ext/langscan/pre-config.rb +57 -0
- data/ext/langscan/python/python/Makefile +188 -0
- data/ext/langscan/python/python/extconf.rb +3 -0
- data/ext/langscan/python/python/modulename.txt +1 -0
- data/ext/langscan/python/python/python.c +134 -0
- data/ext/langscan/python/python/python.h +61 -0
- data/ext/langscan/python/python/pythontok.c +2109 -0
- data/ext/langscan/python/python/pythontok.l +155 -0
- data/ext/langscan/python/python/tokenlist.txt +8 -0
- data/ext/langscan/ruby/compat/ripper/Makefile +189 -0
- data/ext/langscan/ruby/compat/ripper/depend +1 -0
- data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
- data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
- data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
- data/ext/langscan/scheme/scheme/Makefile +188 -0
- data/ext/langscan/scheme/scheme/extconf.rb +3 -0
- data/ext/langscan/scheme/scheme/modulename.txt +1 -0
- data/ext/langscan/scheme/scheme/scheme.c +134 -0
- data/ext/langscan/scheme/scheme/scheme.h +60 -0
- data/ext/langscan/scheme/scheme/schemetok.c +2454 -0
- data/ext/langscan/scheme/scheme/schemetok.l +177 -0
- data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
- data/ext/langscan/sh/sh/Makefile +188 -0
- data/ext/langscan/sh/sh/extconf.rb +3 -0
- data/ext/langscan/sh/sh/modulename.txt +1 -0
- data/ext/langscan/sh/sh/sh.c +134 -0
- data/ext/langscan/sh/sh/sh.h +61 -0
- data/ext/langscan/sh/sh/shtok.c +2477 -0
- data/ext/langscan/sh/sh/shtok.l +325 -0
- data/ext/langscan/sh/sh/tokenlist.txt +8 -0
- data/lib/langscan.rb +124 -0
- data/lib/langscan/_common.rb +50 -0
- data/lib/langscan/_easyscanner.rb +78 -0
- data/lib/langscan/_pairmatcher.rb +46 -0
- data/lib/langscan/_type.rb +125 -0
- data/lib/langscan/autoconf.rb +51 -0
- data/lib/langscan/automake.rb +51 -0
- data/lib/langscan/brainfuck.rb +48 -0
- data/lib/langscan/c.rb +144 -0
- data/lib/langscan/c/c.so +0 -0
- data/lib/langscan/csharp.rb +101 -0
- data/lib/langscan/csharp/csharp.so +0 -0
- data/lib/langscan/css.rb +109 -0
- data/lib/langscan/d.rb +201 -0
- data/lib/langscan/d/d.so +0 -0
- data/lib/langscan/eiffel.rb +167 -0
- data/lib/langscan/elisp.rb +132 -0
- data/lib/langscan/elisp/elisp.so +0 -0
- data/lib/langscan/io.rb +84 -0
- data/lib/langscan/java.rb +95 -0
- data/lib/langscan/java/java.so +0 -0
- data/lib/langscan/javascript.rb +97 -0
- data/lib/langscan/javascript/javascript.so +0 -0
- data/lib/langscan/lua.rb +116 -0
- data/lib/langscan/ocaml.rb +298 -0
- data/lib/langscan/ocaml/camlexer.ml +28 -0
- data/lib/langscan/ocaml/lexer.mll +230 -0
- data/lib/langscan/ocaml/types.ml +36 -0
- data/lib/langscan/pairmatcher/pairmatcher.so +0 -0
- data/lib/langscan/perl.rb +87 -0
- data/lib/langscan/perl/tokenizer.pl +231 -0
- data/lib/langscan/php.rb +80 -0
- data/lib/langscan/php/php.so +0 -0
- data/lib/langscan/python.rb +101 -0
- data/lib/langscan/python/python.so +0 -0
- data/lib/langscan/rpmspec.rb +71 -0
- data/lib/langscan/ruby.rb +164 -0
- data/lib/langscan/ruby/compat/README +5 -0
- data/lib/langscan/ruby/compat/ripper.rb +4 -0
- data/lib/langscan/ruby/compat/ripper.so +0 -0
- data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
- data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
- data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
- data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
- data/lib/langscan/scheme.rb +160 -0
- data/lib/langscan/scheme/scheme.so +0 -0
- data/lib/langscan/sh.rb +116 -0
- data/lib/langscan/sh/sh.so +0 -0
- data/lib/langscan/text.rb +37 -0
- data/metaconfig +2 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/setup.rb +1604 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/langscan.rake +42 -0
- data/test/langscan/brainfuck/test/test_scan.rb +55 -0
- data/test/langscan/c/test/test_scan.rb +216 -0
- data/test/langscan/c/test/test_token.rb +41 -0
- data/test/langscan/csharp/test/test_scan.rb +157 -0
- data/test/langscan/css/test/test_css.rb +79 -0
- data/test/langscan/d/test/test_scan.rb +233 -0
- data/test/langscan/d/test/test_token.rb +205 -0
- data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
- data/test/langscan/elisp/test/test_elisp.rb +177 -0
- data/test/langscan/io/test/test_io.rb +79 -0
- data/test/langscan/java/test/test_java.rb +74 -0
- data/test/langscan/javascript/test/test_javascript.rb +39 -0
- data/test/langscan/lua/test/test_lua.rb +69 -0
- data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
- data/test/langscan/php/test/test_scan.rb +138 -0
- data/test/langscan/python/test/test_scan.rb +105 -0
- data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
- data/test/langscan/ruby/test/test_scan.rb +71 -0
- data/test/langscan/scheme/test/test_scan.rb +198 -0
- data/test/test_helper.rb +7 -0
- data/test/test_langscan.rb +123 -0
- metadata +320 -0
@@ -0,0 +1,28 @@
|
|
1
|
+
(*
|
2
|
+
camlexer - Lexical Analyzer for Gonzui ocamlsupport
|
3
|
+
|
4
|
+
Copyright (C) 2005 Soutaro Matsumoto <matsumoto@soutaro.com>
|
5
|
+
All rights reserved.
|
6
|
+
This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
|
8
|
+
You can redistribute it and/or modify it under the terms of
|
9
|
+
the GNU General Public License version 2.
|
10
|
+
*)
|
11
|
+
|
12
|
+
(* $Id: camlexer.ml,v 1.1.1.1 2005/09/15 19:38:39 bashi Exp $ *)
|
13
|
+
|
14
|
+
let main () =
|
15
|
+
try
|
16
|
+
let lexbuf = Lexing.from_channel stdin in
|
17
|
+
while true do
|
18
|
+
let ((lnum,bnum),tname,lexed_str) = (Lexer.token lexbuf) in
|
19
|
+
begin
|
20
|
+
Printf.printf "%d:%d:%s:%s\n" lnum bnum (Types.to_string tname) lexed_str;
|
21
|
+
flush stdout;
|
22
|
+
end
|
23
|
+
done
|
24
|
+
with
|
25
|
+
Lexer.EOF -> exit 0
|
26
|
+
|
27
|
+
let _ = main ()
|
28
|
+
|
@@ -0,0 +1,230 @@
|
|
1
|
+
(*
|
2
|
+
camlexer - Lexical Analyzer for Gonzui ocamlsupport
|
3
|
+
|
4
|
+
Copyright (C) 2005 Soutaro Matsumoto <matsumoto@soutaro.com>
|
5
|
+
All rights reserved.
|
6
|
+
This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
|
8
|
+
You can redistribute it and/or modify it under the terms of
|
9
|
+
the GNU General Public License version 2.
|
10
|
+
|
11
|
+
*)
|
12
|
+
|
13
|
+
(* $Id: lexer.mll,v 1.1.1.1 2005/09/15 19:38:39 bashi Exp $ *)
|
14
|
+
|
15
|
+
{
|
16
|
+
exception EOF
|
17
|
+
|
18
|
+
open Types
|
19
|
+
|
20
|
+
let lnum = ref 1
|
21
|
+
let inc_lnum () =
|
22
|
+
begin
|
23
|
+
lnum := !lnum+1;
|
24
|
+
end
|
25
|
+
|
26
|
+
let reset () =
|
27
|
+
lnum := 1
|
28
|
+
|
29
|
+
let get_pos lexbuf =
|
30
|
+
let pos = Lexing.lexeme_start_p lexbuf in
|
31
|
+
let boff = pos.Lexing.pos_bol in
|
32
|
+
let cnum = pos.Lexing.pos_cnum in
|
33
|
+
!lnum,boff+cnum
|
34
|
+
|
35
|
+
let str_lexbuf = ref (None: (Lexing.lexbuf) option)
|
36
|
+
|
37
|
+
}
|
38
|
+
|
39
|
+
let newline = ('\n' | '\r' | "\r\n")
|
40
|
+
let blank = [' ' '\t']
|
41
|
+
let letter = ['a'-'z' 'A'-'Z']
|
42
|
+
let num = ['0'-'9']
|
43
|
+
let ident = (letter | '_') (letter | num | '_' | '\'')*
|
44
|
+
let int_lit =
|
45
|
+
(('-')? num (num | '_')*)
|
46
|
+
| (('-')? ("0x"|"0X") (num | ['A'-'F'] ['a'-'f']) (num | ['A'-'F'] ['a'-'f'] | '_')*)
|
47
|
+
| (('-')? ("0o"|"0O") (['0'-'7']) (['0'-'7'] | '_')*)
|
48
|
+
| (('-')? ("0b"|"0B") (['0'-'1']) (['0'-'1'] | '_')*)
|
49
|
+
let float_lit =
|
50
|
+
('-')? num (num | '_')* ('.' (num | '_')*)? (("e"|"E") ('+'|'-')? num (num | '_')*)?
|
51
|
+
let regular_char = [^ '\'']
|
52
|
+
let escape_sequence =
|
53
|
+
'\\' ['\\' '\"' '\'' 'n' 't' 'b' 'r']
|
54
|
+
| '\\' num num num
|
55
|
+
| "\\x" (num | ['A'-'F'] | ['a'-'f']) (num | ['A'-'F'] | ['a'-'f'])
|
56
|
+
let char_lit =
|
57
|
+
'\'' regular_char '\''
|
58
|
+
| '\'' escape_sequence '\''
|
59
|
+
let label = ['a'-'z'] (letter | num | '_' | '\'')*
|
60
|
+
let operator_char = ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
|
61
|
+
let infix_symbol = ['=' '<' '>' '@' '|' '&' '+' '-' '*' '/' '$' '%'] operator_char*
|
62
|
+
let prefix_symbol = ['!' '?' '~'] operator_char*
|
63
|
+
let keywords =
|
64
|
+
"and" | "as" | "assert" | "asr" | "begin" | "class"
|
65
|
+
| "constraint" | "do" | "done" | "downto" | "else" | "end"
|
66
|
+
| "exception" | "external" | "false" | "for" | "fun" | "function"
|
67
|
+
| "functor" | "if" | "in" | "include" | "inherit" | "initializer"
|
68
|
+
| "land" | "lazy" | "let" | "lor" | "lsl" | "lsr"
|
69
|
+
| "lxor" | "match" | "method" | "mod" | "module" | "mutable"
|
70
|
+
| "new" | "object" | "of" | "open" | "or" | "private"
|
71
|
+
| "rec" | "sig" | "struct" | "then" | "to" | "true"
|
72
|
+
| "try" | "type" | "val" | "virtual" | "when" | "while" | "with"
|
73
|
+
let puncts =
|
74
|
+
"!=" | "#" | "&" | "&&" | "\'" | "(" | ")" | "*" | "+" | "," | "-"
|
75
|
+
| "-." | "->" | "." | ".." | ":" | "::" | ":=" | ":>" | ";" | ";;" | "<"
|
76
|
+
| "<-" | "=" | ">" | ">]" | ">}" | "?" | "??" | "[" | "[<" | "[>" | "[|"
|
77
|
+
| "]" | "_" | "`" | "{" | "{<" | "|" | "|]" | "}" | "~"
|
78
|
+
let camlp4_keywords = "parser"
|
79
|
+
let camlp4_puncts =
|
80
|
+
"<<" | "<:" | ">>" | "$" | "$$" | "$:"
|
81
|
+
let ocamlyacc_keywords =
|
82
|
+
"%token" | "%start" | "%type" | "%left" | "%right" | "%nonassoc" | "%prec"
|
83
|
+
let ocamlyacc_puncts =
|
84
|
+
"%{" | "%}" | "%%"
|
85
|
+
let ocamlyacc_ident = "$" num+
|
86
|
+
let linenum_directive = '#' ' ' num+
|
87
|
+
| '#' ' ' num+ ' ' '\"' [^ '\"']* '\"'
|
88
|
+
let built_in_constants = "false" | "true" | "()" | "[]"
|
89
|
+
|
90
|
+
rule token = parse
|
91
|
+
| newline
|
92
|
+
{
|
93
|
+
begin
|
94
|
+
inc_lnum();
|
95
|
+
token lexbuf;
|
96
|
+
end
|
97
|
+
}
|
98
|
+
| blank +
|
99
|
+
{ token lexbuf }
|
100
|
+
| linenum_directive {
|
101
|
+
(get_pos lexbuf, Ttext, Lexing.lexeme lexbuf);
|
102
|
+
}
|
103
|
+
| keywords | camlp4_keywords | ocamlyacc_keywords {
|
104
|
+
(get_pos lexbuf, Tkeyword, Lexing.lexeme lexbuf)
|
105
|
+
}
|
106
|
+
| built_in_constants {
|
107
|
+
(get_pos lexbuf, Tkeyword, Lexing.lexeme lexbuf)
|
108
|
+
}
|
109
|
+
| "/*" {
|
110
|
+
let pos = get_pos lexbuf in
|
111
|
+
(pos, Tcomment, ocamlyacc_comment 0 "/*" lexbuf);
|
112
|
+
}
|
113
|
+
| "(*" {
|
114
|
+
let pos = get_pos lexbuf in
|
115
|
+
(pos, Tcomment, comment 0 "(*" lexbuf)
|
116
|
+
}
|
117
|
+
| '\"' {
|
118
|
+
let pos = get_pos lexbuf in
|
119
|
+
(pos, Tstring, string "\"" lexbuf) }
|
120
|
+
| puncts | camlp4_puncts | ocamlyacc_puncts {
|
121
|
+
(get_pos lexbuf, Tpunct, Lexing.lexeme lexbuf)
|
122
|
+
}
|
123
|
+
| infix_symbol | prefix_symbol {
|
124
|
+
(get_pos lexbuf, Tident, Lexing.lexeme lexbuf)
|
125
|
+
}
|
126
|
+
| ('~'|'?') label ':' {
|
127
|
+
let s = Lexing.lexeme lexbuf in
|
128
|
+
let name = String.sub s 1 (String.length s - 2) in
|
129
|
+
(get_pos lexbuf, Tident, s)
|
130
|
+
}
|
131
|
+
| ident | ocamlyacc_ident {
|
132
|
+
(get_pos lexbuf, Tident, Lexing.lexeme lexbuf)
|
133
|
+
}
|
134
|
+
| char_lit {
|
135
|
+
(get_pos lexbuf, Tchar, Lexing.lexeme lexbuf)
|
136
|
+
}
|
137
|
+
| int_lit {
|
138
|
+
(get_pos lexbuf, Tint, Lexing.lexeme lexbuf)
|
139
|
+
}
|
140
|
+
| float_lit {
|
141
|
+
(get_pos lexbuf, Tfloat, Lexing.lexeme lexbuf)
|
142
|
+
}
|
143
|
+
| eof { raise EOF }
|
144
|
+
| _
|
145
|
+
{ token lexbuf }
|
146
|
+
|
147
|
+
and comment lv acc = parse
|
148
|
+
| newline {
|
149
|
+
begin
|
150
|
+
inc_lnum();
|
151
|
+
comment lv (acc ^ "\\o") lexbuf;
|
152
|
+
end
|
153
|
+
}
|
154
|
+
| "(*" {
|
155
|
+
comment (lv+1) (acc ^ Lexing.lexeme lexbuf) lexbuf
|
156
|
+
}
|
157
|
+
| "*)" {
|
158
|
+
if lv = 0
|
159
|
+
then
|
160
|
+
acc ^ "*)"
|
161
|
+
else
|
162
|
+
comment (lv-1) (acc ^ Lexing.lexeme lexbuf) lexbuf
|
163
|
+
}
|
164
|
+
| ([^ '\\'] as c1) "\"" {
|
165
|
+
let s = string "\"" lexbuf in
|
166
|
+
match !str_lexbuf with
|
167
|
+
Some lexbuf -> comment lv (acc ^ Printf.sprintf "%c" c1 ^ s) lexbuf
|
168
|
+
}
|
169
|
+
| char_lit {
|
170
|
+
comment lv (acc ^ Lexing.lexeme lexbuf) lexbuf
|
171
|
+
}
|
172
|
+
| _ {
|
173
|
+
let s = Lexing.lexeme lexbuf in
|
174
|
+
comment lv (acc^s) lexbuf
|
175
|
+
}
|
176
|
+
|
177
|
+
and string acc = parse
|
178
|
+
| newline {
|
179
|
+
begin
|
180
|
+
inc_lnum();
|
181
|
+
string (acc ^ "\\o") lexbuf;
|
182
|
+
end
|
183
|
+
}
|
184
|
+
| '\"' {
|
185
|
+
begin
|
186
|
+
str_lexbuf := Some lexbuf;
|
187
|
+
acc ^ "\"";
|
188
|
+
end
|
189
|
+
}
|
190
|
+
| escape_sequence {
|
191
|
+
string (acc ^ Lexing.lexeme lexbuf) lexbuf
|
192
|
+
}
|
193
|
+
| char_lit {
|
194
|
+
string (acc ^ Lexing.lexeme lexbuf) lexbuf
|
195
|
+
}
|
196
|
+
| _ {
|
197
|
+
let s = Lexing.lexeme lexbuf in
|
198
|
+
string (acc^s) lexbuf
|
199
|
+
}
|
200
|
+
|
201
|
+
and ocamlyacc_comment lv acc = parse
|
202
|
+
| newline {
|
203
|
+
begin
|
204
|
+
inc_lnum();
|
205
|
+
ocamlyacc_comment lv (acc ^ "\\o") lexbuf;
|
206
|
+
end
|
207
|
+
}
|
208
|
+
| "/*" {
|
209
|
+
ocamlyacc_comment (lv+1) (acc ^ Lexing.lexeme lexbuf) lexbuf
|
210
|
+
}
|
211
|
+
| "*/" {
|
212
|
+
if lv = 0
|
213
|
+
then
|
214
|
+
acc ^ "*/"
|
215
|
+
else
|
216
|
+
ocamlyacc_comment (lv-1) (acc ^ Lexing.lexeme lexbuf) lexbuf
|
217
|
+
}
|
218
|
+
| "\"" {
|
219
|
+
let s = string "\"" lexbuf in
|
220
|
+
match !str_lexbuf with
|
221
|
+
Some lexbuf -> ocamlyacc_comment lv (acc ^ s) lexbuf
|
222
|
+
}
|
223
|
+
| char_lit {
|
224
|
+
ocamlyacc_comment lv (acc ^ Lexing.lexeme lexbuf) lexbuf
|
225
|
+
}
|
226
|
+
| _ {
|
227
|
+
let s = Lexing.lexeme lexbuf in
|
228
|
+
ocamlyacc_comment lv (acc^s) lexbuf
|
229
|
+
}
|
230
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
(*
|
2
|
+
camlexer - Lexical Analyzer for Gonzui ocamlsupport
|
3
|
+
|
4
|
+
Copyright (C) 2005 Soutaro Matsumoto <matsumoto@soutaro.com>
|
5
|
+
All rights reserved.
|
6
|
+
This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
|
8
|
+
You can redistribute it and/or modify it under the terms of
|
9
|
+
the GNU General Public License version 2.
|
10
|
+
*)
|
11
|
+
|
12
|
+
(* $Id: types.ml,v 1.1.1.1 2005/09/15 19:38:38 bashi Exp $ *)
|
13
|
+
|
14
|
+
type gonzui_type = Tident
|
15
|
+
| Tpunct
|
16
|
+
| Tfuncdef
|
17
|
+
| Ttext
|
18
|
+
| Tstring
|
19
|
+
| Tcomment
|
20
|
+
| Tkeyword
|
21
|
+
| Tchar
|
22
|
+
| Tint
|
23
|
+
| Tfloat
|
24
|
+
|
25
|
+
let to_string = function
|
26
|
+
Tident -> "ident"
|
27
|
+
| Tpunct -> "punct"
|
28
|
+
| Tfuncdef -> "funcdef"
|
29
|
+
| Ttext -> "text"
|
30
|
+
| Tstring -> "string"
|
31
|
+
| Tcomment -> "comment"
|
32
|
+
| Tkeyword -> "keyword"
|
33
|
+
| Tchar -> "character"
|
34
|
+
| Tfloat -> "float"
|
35
|
+
| Tint -> "integer"
|
36
|
+
|
Binary file
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#
|
2
|
+
# perl.rb - a Perl module of LangScan
|
3
|
+
#
|
4
|
+
# Copyright (C) 2005 Tatsuhiko Miyagawa <miyagawa@bulknews.net>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
dn = "/dev/null"
|
13
|
+
dn = "nul" if (/mswin|mingw|bccwin/ =~ RUBY_PLATFORM)
|
14
|
+
unless system("perl -MPPI -e 1 2>#{dn}")
|
15
|
+
raise LoadError.new("PPI module is required")
|
16
|
+
end
|
17
|
+
|
18
|
+
require 'langscan/_common'
|
19
|
+
|
20
|
+
module LangScan
|
21
|
+
module Perl
|
22
|
+
module_function
|
23
|
+
|
24
|
+
def name
|
25
|
+
"Perl"
|
26
|
+
end
|
27
|
+
|
28
|
+
def abbrev
|
29
|
+
"perl"
|
30
|
+
end
|
31
|
+
|
32
|
+
def extnames
|
33
|
+
[".pl", ".PL", ".pm", ".t" ] # XXX remove ".t"
|
34
|
+
end
|
35
|
+
|
36
|
+
PERLTOKENIZER_PATH = $LOAD_PATH.map {|path|
|
37
|
+
File.join(path, "langscan/perl/tokenizer.pl")
|
38
|
+
}.find {|path| File.file?(path) }
|
39
|
+
raise "tokenizer.pl not found" if PERLTOKENIZER_PATH.nil?
|
40
|
+
|
41
|
+
def shell_escape(file_name)
|
42
|
+
'"' + file_name.gsub(/([$"\\`])/, "\\\\\\1") + '"'
|
43
|
+
end
|
44
|
+
|
45
|
+
def open_tokenizer
|
46
|
+
command_line = sprintf("perl %s 2>/dev/null",
|
47
|
+
shell_escape(PERLTOKENIZER_PATH))
|
48
|
+
@io = IO.popen(command_line, "r+")
|
49
|
+
end
|
50
|
+
|
51
|
+
# LangScan::Perl.scan iterates over Perl program.
|
52
|
+
# It yields for each Fragment.
|
53
|
+
def scan(input)
|
54
|
+
open_tokenizer if @io.nil? or @io.closed? # in case of Perl error
|
55
|
+
@io.puts(input.length)
|
56
|
+
@io.write(input)
|
57
|
+
inputlen = input.length
|
58
|
+
buflen = 0
|
59
|
+
begin
|
60
|
+
while (buflen < inputlen)
|
61
|
+
type = @io.readline.chomp.intern
|
62
|
+
lineno = @io.readline.chomp.to_i
|
63
|
+
byteno = @io.readline.chomp.to_i
|
64
|
+
bodylen = @io.readline.chomp.to_i
|
65
|
+
text = @io.read(bodylen)
|
66
|
+
if type.nil? or text.nil? or lineno.nil? or byteno.nil?
|
67
|
+
raise ScanFailed.new("Unexpected output from tokenizer.pl")
|
68
|
+
end
|
69
|
+
yield Fragment.new(type, text, lineno, byteno)
|
70
|
+
@io.read(1) # newline
|
71
|
+
buflen += bodylen
|
72
|
+
end
|
73
|
+
rescue EOFError
|
74
|
+
@io.close
|
75
|
+
raise ScanFailed.new("tokenizer.pl failed to parse")
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
LangScan.register(self)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
@@ -0,0 +1,231 @@
|
|
1
|
+
# tokenizer.pl: tokenize Perl scripts as gonzui langscan format
|
2
|
+
#
|
3
|
+
# Author: Tatsuhiko Miyagawa <miyagawa@bulknews.net>
|
4
|
+
# License: Same as Perl (Artistic/GPL2)
|
5
|
+
#
|
6
|
+
|
7
|
+
use strict;
|
8
|
+
use PPI::Tokenizer;
|
9
|
+
$PPI::Tokenizer::ALLOW_NONASCII = 1;
|
10
|
+
|
11
|
+
our $Debug = 0;
|
12
|
+
$| = 1;
|
13
|
+
|
14
|
+
# TODO:
|
15
|
+
# 'string' is abused
|
16
|
+
# regexp is string
|
17
|
+
# PPI fails to tokenize source code with UTF-8 binary
|
18
|
+
|
19
|
+
our(%TokenMap, %ReservedWords, %BuiltinFunctions);
|
20
|
+
|
21
|
+
if ($ARGV[0] && $ARGV[0] eq '-d') {
|
22
|
+
# debug mode
|
23
|
+
open my $fh, $ARGV[1] or die "$ARGV[1]: $!";
|
24
|
+
my $code = join '', <$fh>;
|
25
|
+
Tokenizer->new->tokenize(\$code);
|
26
|
+
} else {
|
27
|
+
# persistent mode
|
28
|
+
my $tokenizer = Tokenizer->new;
|
29
|
+
while (1) {
|
30
|
+
chomp(my $length = <STDIN>);
|
31
|
+
last unless defined $length;
|
32
|
+
read(STDIN, my($code), $length);
|
33
|
+
$tokenizer->tokenize(\$code);
|
34
|
+
$tokenizer->reset();
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
package Tokenizer;
|
39
|
+
|
40
|
+
sub new {
|
41
|
+
my $class = shift;
|
42
|
+
my $self = bless { }, $class;
|
43
|
+
$self->reset();
|
44
|
+
$self;
|
45
|
+
}
|
46
|
+
|
47
|
+
sub reset {
|
48
|
+
my $self = shift;
|
49
|
+
$self->{lineno} = 0;
|
50
|
+
$self->{byteno} = 0;
|
51
|
+
$self->{heredoc} = undef;
|
52
|
+
$self->{in_sub} = undef;
|
53
|
+
$self->{in_package} = undef;
|
54
|
+
$self->{in_arrow} = undef;
|
55
|
+
$self->{in_usereq} = undef;
|
56
|
+
}
|
57
|
+
|
58
|
+
sub tokenize {
|
59
|
+
my($self, $coderef) = @_;
|
60
|
+
my $tokenizer = PPI::Tokenizer->new($coderef) or die "Can't tokenize code: $$coderef";
|
61
|
+
while (my $token = $tokenizer->get_token) {
|
62
|
+
$self->dump_element($token);
|
63
|
+
}
|
64
|
+
my $code_length = length $$coderef;
|
65
|
+
$self->{byteno} == $code_length or die "Tokenize error: $self->{byteno}:$code_length";
|
66
|
+
}
|
67
|
+
|
68
|
+
sub dump_element {
|
69
|
+
my($self, $element) = @_;
|
70
|
+
if ($element->isa('PPI::Token::HereDoc')) {
|
71
|
+
$self->_dump("punct", $element->content);
|
72
|
+
$self->{heredoc} ||= [];
|
73
|
+
push @{$self->{heredoc}}, {
|
74
|
+
body => $element->{_heredoc},
|
75
|
+
eof => $element->{_terminator_line},
|
76
|
+
};
|
77
|
+
return;
|
78
|
+
} elsif ($self->{heredoc} && $element->isa('PPI::Token::Whitespace') && $element->content eq "\n") {
|
79
|
+
$self->_dump(token_name($element), $element->content);
|
80
|
+
for my $heredoc (@{$self->{heredoc}}) {
|
81
|
+
$self->_dump(string => join "", @{$heredoc->{body}});
|
82
|
+
$self->_dump(punct => $heredoc->{eof});
|
83
|
+
}
|
84
|
+
$self->{heredoc} = undef;
|
85
|
+
return;
|
86
|
+
} elsif ($element->isa('PPI::Token::Word') && $element->content eq 'sub') {
|
87
|
+
$self->{in_sub} = 1;
|
88
|
+
} elsif ($element->isa('PPI::Token::Word') && $element->content eq 'package') {
|
89
|
+
$self->{in_package} = 1;
|
90
|
+
} elsif ($element->isa('PPI::Token::Word') && ($element->content eq 'use' || $element->content eq 'require')) {
|
91
|
+
$self->{in_usereq} = 1;
|
92
|
+
} elsif ($element->isa('PPI::Token::Operator') && $element->content eq '->') {
|
93
|
+
$self->{in_arrow} = 1;
|
94
|
+
} elsif ($self->{in_sub} && !$element->isa('PPI::Token::Whitespace')) {
|
95
|
+
$self->{in_sub} = undef;
|
96
|
+
if ($element->isa('PPI::Token::Word')) {
|
97
|
+
warn "sub $element->{content}\n" if $Debug;
|
98
|
+
$self->_dump(fundef => $element->content);
|
99
|
+
return;
|
100
|
+
}
|
101
|
+
} elsif ($self->{in_package} && !$element->isa('PPI::Token::Whitespace')) {
|
102
|
+
$self->{in_package} = undef;
|
103
|
+
if ($element->isa('PPI::Token::Word')) {
|
104
|
+
warn "package $element->{content}\n" if $Debug;
|
105
|
+
$self->_dump(classdef => $element->content);
|
106
|
+
return;
|
107
|
+
}
|
108
|
+
} elsif ($self->{in_arrow} && !$element->isa('PPI::Token::Whitespace')) {
|
109
|
+
$self->{in_arrow} = undef;
|
110
|
+
if ($element->isa('PPI::Token::Word')) {
|
111
|
+
warn "->$element->{content}\n" if $Debug;
|
112
|
+
$self->_dump(funcall => $element->content);
|
113
|
+
return;
|
114
|
+
}
|
115
|
+
} elsif ($self->{in_usereq} && !$element->isa('PPI::Token::Whitespace')) {
|
116
|
+
$self->{in_usereq} = undef;
|
117
|
+
if ($element->isa('PPI::Token::Word')) {
|
118
|
+
warn "use $element->{content}\n" if $Debug;
|
119
|
+
$self->_dump(classref => $element->content);
|
120
|
+
return;
|
121
|
+
}
|
122
|
+
}
|
123
|
+
$self->_dump(token_name($element), $element->content);
|
124
|
+
}
|
125
|
+
|
126
|
+
sub _dump {
|
127
|
+
my($self, $type, $text) = @_;
|
128
|
+
my $bodysize = length $text;
|
129
|
+
print <<DUMP;
|
130
|
+
$type
|
131
|
+
$self->{lineno}
|
132
|
+
$self->{byteno}
|
133
|
+
$bodysize
|
134
|
+
$text
|
135
|
+
DUMP
|
136
|
+
;
|
137
|
+
$self->{byteno} += $bodysize;
|
138
|
+
$self->{lineno} += $text =~ tr/\n//d;
|
139
|
+
}
|
140
|
+
|
141
|
+
sub token_name {
|
142
|
+
my $token = shift;
|
143
|
+
if ($token->isa('PPI::Token::Word')) {
|
144
|
+
return $ReservedWords{$token->content} ? "keyword" :
|
145
|
+
$BuiltinFunctions{$token->content} ? "funcall" : "word";
|
146
|
+
} elsif (ref($token) eq 'PPI::Token::Number') {
|
147
|
+
return $token->{_subtype} eq 'base256' ? "floating" : "integer";
|
148
|
+
}
|
149
|
+
$TokenMap{ref($token)} || "word";
|
150
|
+
}
|
151
|
+
|
152
|
+
BEGIN {
|
153
|
+
%TokenMap = qw(
|
154
|
+
PPI::Token::ArrayIndex ident
|
155
|
+
PPI::Token::Attribute fundef
|
156
|
+
PPI::Token::Cast punct
|
157
|
+
PPI::Token::Comment text
|
158
|
+
PPI::Token::DashedWord punct
|
159
|
+
PPI::Token::Data text
|
160
|
+
PPI::Token::End punct
|
161
|
+
PPI::Token::HereDoc *
|
162
|
+
PPI::Token::Label word
|
163
|
+
PPI::Token::Magic punct
|
164
|
+
PPI::Token::Number *
|
165
|
+
PPI::Token::Operator punct
|
166
|
+
PPI::Token::Pod text
|
167
|
+
PPI::Token::Prototype punct
|
168
|
+
PPI::Token::Quote::Double string
|
169
|
+
PPI::Token::Quote::Interpolate string
|
170
|
+
PPI::Token::Quote::Literal string
|
171
|
+
PPI::Token::Quote::Single string
|
172
|
+
PPI::Token::QuoteLike::Backtick string
|
173
|
+
PPI::Token::QuoteLike::Command string
|
174
|
+
PPI::Token::QuoteLike::Readline string
|
175
|
+
PPI::Token::QuoteLike::Regexp string
|
176
|
+
PPI::Token::QuoteLike::Words string
|
177
|
+
PPI::Token::Regexp::Match word
|
178
|
+
PPI::Token::Regexp::Substitute word
|
179
|
+
PPI::Token::Regexp::Transliterate word
|
180
|
+
PPI::Token::Separator punct
|
181
|
+
PPI::Token::Structure punct
|
182
|
+
PPI::Token::Symbol ident
|
183
|
+
PPI::Token::Unknown punct
|
184
|
+
PPI::Token::Whitespace punct
|
185
|
+
PPI::Token::Word *
|
186
|
+
);
|
187
|
+
|
188
|
+
# borrowed from Apache::PrettyPerl, with slight fixes
|
189
|
+
%ReservedWords = map { $_ => 1 } qw(
|
190
|
+
while until for foreach unless if elsif else do
|
191
|
+
package use no require import and or eq ne cmp
|
192
|
+
my our local next last redo goto return sub
|
193
|
+
);
|
194
|
+
%BuiltinFunctions = map { $_ => 1 } qw(
|
195
|
+
abs accept alarm atan2 bind binmode bless
|
196
|
+
caller chdir chmod chomp chop chown chr
|
197
|
+
chroot close closedir connect continue cos
|
198
|
+
crypt dbmclose dbmopen defined delete die
|
199
|
+
dump each endgrent endhostent endnetent
|
200
|
+
endprotoent endpwent endservent eof eval
|
201
|
+
exec exists exit exp fcntl fileno flock
|
202
|
+
fork format formline getc getgrent getgrgid
|
203
|
+
getgrnam gethostbyaddr gethostbyname gethostent
|
204
|
+
getlogin getnetbyaddr getnetbyname getnetent
|
205
|
+
getpeername getpgrp getppid getpriority
|
206
|
+
getprotobyname getprotobynumber getprotoent
|
207
|
+
getpwent getpwnam getpwuid getservbyname
|
208
|
+
getservbyport getservent getsockname
|
209
|
+
getsockopt glob gmtime goto grep hex index
|
210
|
+
int ioctl join keys kill last lc lcfirst
|
211
|
+
length link listen local localtime log
|
212
|
+
lstat map mkdir msgctl msgget msgrcv
|
213
|
+
msgsnd my next oct open opendir ord our pack
|
214
|
+
pipe pop pos print printf prototype push
|
215
|
+
quotemeta rand read readdir readline
|
216
|
+
readlink readpipe recv redo ref rename
|
217
|
+
reset return reverse rewinddir rindex
|
218
|
+
rmdir scalar seek seekdir select semctl
|
219
|
+
semget semop send setgrent sethostent
|
220
|
+
setnetent setpgrp setpriority setprotoent
|
221
|
+
setpwent setservent setsockopt shift shmctl
|
222
|
+
shmget shmread shmwrite shutdown sin sleep
|
223
|
+
socket socketpair sort splice split sprintf
|
224
|
+
sqrt srand stat study sub substr symlink
|
225
|
+
syscall sysopen sysread sysread sysseek
|
226
|
+
system syswrite tell telldir tie tied
|
227
|
+
time times truncate uc ucfirst umask undef
|
228
|
+
unlink unpack unshift untie utime values
|
229
|
+
vec wait waitpid wantarray warn write
|
230
|
+
);
|
231
|
+
}
|