langscan 1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS.txt +19 -0
- data/History.txt +126 -0
- data/Manifest.txt +167 -0
- data/README.rdoc +89 -0
- data/Rakefile +40 -0
- data/ext/langscan/_make_c.rb +20 -0
- data/ext/langscan/_make_h.rb +30 -0
- data/ext/langscan/_template.c +134 -0
- data/ext/langscan/_template.h +53 -0
- data/ext/langscan/c/c/Makefile +157 -0
- data/ext/langscan/c/c/c.c +134 -0
- data/ext/langscan/c/c/c.h +66 -0
- data/ext/langscan/c/c/ctok.c +4622 -0
- data/ext/langscan/c/c/ctok.l +212 -0
- data/ext/langscan/c/c/extconf.rb +3 -0
- data/ext/langscan/c/c/modulename.txt +1 -0
- data/ext/langscan/c/c/tokenlist.txt +13 -0
- data/ext/langscan/csharp/csharp/Makefile +157 -0
- data/ext/langscan/csharp/csharp/csharp.c +134 -0
- data/ext/langscan/csharp/csharp/csharp.h +65 -0
- data/ext/langscan/csharp/csharp/csharptok.c +2965 -0
- data/ext/langscan/csharp/csharp/csharptok.l +200 -0
- data/ext/langscan/csharp/csharp/extconf.rb +3 -0
- data/ext/langscan/csharp/csharp/modulename.txt +1 -0
- data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
- data/ext/langscan/d/d/Makefile +157 -0
- data/ext/langscan/d/d/d.c +134 -0
- data/ext/langscan/d/d/d.h +64 -0
- data/ext/langscan/d/d/dtok.c +5461 -0
- data/ext/langscan/d/d/dtok.l +282 -0
- data/ext/langscan/d/d/extconf.rb +3 -0
- data/ext/langscan/d/d/modulename.txt +1 -0
- data/ext/langscan/d/d/tokenlist.txt +11 -0
- data/ext/langscan/elisp/elisp/Makefile +157 -0
- data/ext/langscan/elisp/elisp/elisp.c +134 -0
- data/ext/langscan/elisp/elisp/elisp.h +62 -0
- data/ext/langscan/elisp/elisp/elisptok.c +2101 -0
- data/ext/langscan/elisp/elisp/elisptok.l +151 -0
- data/ext/langscan/elisp/elisp/extconf.rb +3 -0
- data/ext/langscan/elisp/elisp/modulename.txt +1 -0
- data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
- data/ext/langscan/java/java/Makefile +157 -0
- data/ext/langscan/java/java/extconf.rb +3 -0
- data/ext/langscan/java/java/java.c +134 -0
- data/ext/langscan/java/java/java.h +64 -0
- data/ext/langscan/java/java/javatok.c +2090 -0
- data/ext/langscan/java/java/javatok.l +155 -0
- data/ext/langscan/java/java/modulename.txt +1 -0
- data/ext/langscan/java/java/tokenlist.txt +11 -0
- data/ext/langscan/javascript/javascript/Makefile +157 -0
- data/ext/langscan/javascript/javascript/extconf.rb +3 -0
- data/ext/langscan/javascript/javascript/javascript.c +134 -0
- data/ext/langscan/javascript/javascript/javascript.h +63 -0
- data/ext/langscan/javascript/javascript/javascripttok.c +2051 -0
- data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
- data/ext/langscan/javascript/javascript/modulename.txt +1 -0
- data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
- data/ext/langscan/pairmatcher/pairmatcher/Makefile +157 -0
- data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
- data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
- data/ext/langscan/php/php/Makefile +157 -0
- data/ext/langscan/php/php/extconf.rb +3 -0
- data/ext/langscan/php/php/modulename.txt +1 -0
- data/ext/langscan/php/php/php.c +134 -0
- data/ext/langscan/php/php/php.h +64 -0
- data/ext/langscan/php/php/phptok.c +2406 -0
- data/ext/langscan/php/php/phptok.l +212 -0
- data/ext/langscan/php/php/tokenlist.txt +11 -0
- data/ext/langscan/post-distclean.rb +21 -0
- data/ext/langscan/pre-config.rb +57 -0
- data/ext/langscan/python/python/Makefile +157 -0
- data/ext/langscan/python/python/extconf.rb +3 -0
- data/ext/langscan/python/python/modulename.txt +1 -0
- data/ext/langscan/python/python/python.c +134 -0
- data/ext/langscan/python/python/python.h +61 -0
- data/ext/langscan/python/python/pythontok.c +2102 -0
- data/ext/langscan/python/python/pythontok.l +155 -0
- data/ext/langscan/python/python/tokenlist.txt +8 -0
- data/ext/langscan/ruby/compat/ripper/Makefile +158 -0
- data/ext/langscan/ruby/compat/ripper/depend +1 -0
- data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
- data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
- data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
- data/ext/langscan/scheme/scheme/Makefile +157 -0
- data/ext/langscan/scheme/scheme/extconf.rb +3 -0
- data/ext/langscan/scheme/scheme/modulename.txt +1 -0
- data/ext/langscan/scheme/scheme/scheme.c +134 -0
- data/ext/langscan/scheme/scheme/scheme.h +60 -0
- data/ext/langscan/scheme/scheme/schemetok.c +2447 -0
- data/ext/langscan/scheme/scheme/schemetok.l +177 -0
- data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
- data/ext/langscan/sh/sh/Makefile +157 -0
- data/ext/langscan/sh/sh/extconf.rb +3 -0
- data/ext/langscan/sh/sh/modulename.txt +1 -0
- data/ext/langscan/sh/sh/sh.c +134 -0
- data/ext/langscan/sh/sh/sh.h +61 -0
- data/ext/langscan/sh/sh/shtok.c +2470 -0
- data/ext/langscan/sh/sh/shtok.l +325 -0
- data/ext/langscan/sh/sh/tokenlist.txt +8 -0
- data/lib/langscan.rb +124 -0
- data/lib/langscan/_common.rb +50 -0
- data/lib/langscan/_easyscanner.rb +78 -0
- data/lib/langscan/_pairmatcher.rb +46 -0
- data/lib/langscan/_type.rb +125 -0
- data/lib/langscan/autoconf.rb +51 -0
- data/lib/langscan/automake.rb +51 -0
- data/lib/langscan/brainfuck.rb +48 -0
- data/lib/langscan/c.rb +144 -0
- data/lib/langscan/csharp.rb +101 -0
- data/lib/langscan/css.rb +109 -0
- data/lib/langscan/d.rb +201 -0
- data/lib/langscan/eiffel.rb +167 -0
- data/lib/langscan/elisp.rb +132 -0
- data/lib/langscan/io.rb +84 -0
- data/lib/langscan/java.rb +95 -0
- data/lib/langscan/javascript.rb +97 -0
- data/lib/langscan/lua.rb +116 -0
- data/lib/langscan/ocaml.rb +298 -0
- data/lib/langscan/ocaml/camlexer.ml +28 -0
- data/lib/langscan/ocaml/lexer.mll +230 -0
- data/lib/langscan/ocaml/types.ml +36 -0
- data/lib/langscan/perl.rb +87 -0
- data/lib/langscan/perl/tokenizer.pl +231 -0
- data/lib/langscan/php.rb +80 -0
- data/lib/langscan/python.rb +101 -0
- data/lib/langscan/rpmspec.rb +71 -0
- data/lib/langscan/ruby.rb +164 -0
- data/lib/langscan/ruby/compat/README +5 -0
- data/lib/langscan/ruby/compat/ripper.rb +4 -0
- data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
- data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
- data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
- data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
- data/lib/langscan/scheme.rb +160 -0
- data/lib/langscan/sh.rb +116 -0
- data/lib/langscan/text.rb +37 -0
- data/metaconfig +2 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/setup.rb +1604 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/langscan.rake +42 -0
- data/test/langscan/brainfuck/test/test_scan.rb +55 -0
- data/test/langscan/c/test/test_scan.rb +216 -0
- data/test/langscan/c/test/test_token.rb +41 -0
- data/test/langscan/csharp/test/test_scan.rb +157 -0
- data/test/langscan/css/test/test_css.rb +79 -0
- data/test/langscan/d/test/test_scan.rb +233 -0
- data/test/langscan/d/test/test_token.rb +205 -0
- data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
- data/test/langscan/elisp/test/test_elisp.rb +177 -0
- data/test/langscan/io/test/test_io.rb +79 -0
- data/test/langscan/java/test/test_java.rb +74 -0
- data/test/langscan/javascript/test/test_javascript.rb +39 -0
- data/test/langscan/lua/test/test_lua.rb +69 -0
- data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
- data/test/langscan/php/test/test_scan.rb +138 -0
- data/test/langscan/python/test/test_scan.rb +105 -0
- data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
- data/test/langscan/ruby/test/test_scan.rb +71 -0
- data/test/langscan/scheme/test/test_scan.rb +198 -0
- data/test/test_helper.rb +7 -0
- data/test/test_langscan.rb +123 -0
- metadata +296 -0
data/lib/langscan/c.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
#
|
2
|
+
# c.rb - a C module of LangScan
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Akira Tanaka <akr@m17n.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'langscan/c/c'
|
13
|
+
require 'langscan/_common'
|
14
|
+
require 'langscan/_pairmatcher'
|
15
|
+
|
16
|
+
module LangScan
|
17
|
+
module C
|
18
|
+
module_function
|
19
|
+
def name
|
20
|
+
"C/C++"
|
21
|
+
end
|
22
|
+
|
23
|
+
def abbrev
|
24
|
+
"c"
|
25
|
+
end
|
26
|
+
|
27
|
+
def extnames
|
28
|
+
[".c", ".h", ".cc", ".cpp"]
|
29
|
+
end
|
30
|
+
|
31
|
+
# LangScan::C.scan iterates over C program.
|
32
|
+
# It yields for each Fragment.
|
33
|
+
def scan(input, &block)
|
34
|
+
sorter = PairMatcher.fragmentsorter(block)
|
35
|
+
scan_unsorted(input, &sorter)
|
36
|
+
end
|
37
|
+
|
38
|
+
def scan_unsorted(input, &block)
|
39
|
+
pm = LangScan::PairMatcher.new(3,2,2,2)
|
40
|
+
pm.define_intertoken_fragment :space, nil
|
41
|
+
pm.define_intertoken_fragment :comment, nil
|
42
|
+
pm.define_pair :paren, :punct, "(", :punct, ")"
|
43
|
+
pm.define_pair :brace, :punct, "{", :punct, "}"
|
44
|
+
pm.define_pair :bracket, :punct, "[", :punct, "]"
|
45
|
+
pm.define_pair :preproc, :preproc_beg, "#", :preproc_end, "\n"
|
46
|
+
pm.parse(LangScan::C::Tokenizer.new(input), lambda {|f|
|
47
|
+
if f.type == :ident
|
48
|
+
f.type = IdentType[f.text]
|
49
|
+
end
|
50
|
+
yield f
|
51
|
+
}) {|pair|
|
52
|
+
if pair.pair_type == :paren
|
53
|
+
if 1 <= pair.before_open_length
|
54
|
+
fun = pair.around_open(-1)
|
55
|
+
if fun.type == :ident && IdentType[fun.text] == :ident
|
56
|
+
# ident(...)
|
57
|
+
if (outer = pair.outer) && pair.outmost.pair_type == :paren
|
58
|
+
# type ident(type (*arg)());
|
59
|
+
elsif outer &&
|
60
|
+
outer.pair_type == :preproc &&
|
61
|
+
2 <= outer.after_open_length &&
|
62
|
+
outer.around_open(1).type == :ident && /\Adefine\z/ =~ outer.around_open(1).text &&
|
63
|
+
outer.around_open(2) == pair.around_open(-1)
|
64
|
+
# #define ident(...)
|
65
|
+
# #define ident (...)
|
66
|
+
if pair.around_open(-1).end_byteno == pair.open_token.beg_byteno
|
67
|
+
# #define ident(...)
|
68
|
+
fun.type = :fundef
|
69
|
+
end
|
70
|
+
elsif !outer ||
|
71
|
+
(!outer.outer && # extern "C" { ... }
|
72
|
+
outer.pair_type == :brace &&
|
73
|
+
2 <= outer.before_open_length &&
|
74
|
+
outer.around_open(-2).type == :ident && /\Aextern\z/ =~ outer.around_open(-2).text &&
|
75
|
+
outer.around_open(-1).type == :string && /\A"C"\z/ =~ outer.around_open(-1).text)
|
76
|
+
if 2 <= pair.before_open_length &&
|
77
|
+
pair.around_open(1).type == :punct && pair.around_open(1).text == '(' &&
|
78
|
+
pair.around_close(-1).type == :punct && pair.around_close(-1).text == ')' &&
|
79
|
+
pair.around_open(-2).type == :ident && IdentType[pair.around_open(-2).text] == :ident
|
80
|
+
# ident ident((...))
|
81
|
+
pair.around_open(-2).type = :fundecl
|
82
|
+
elsif 1 <= pair.after_close_length &&
|
83
|
+
pair.around_close(1).type == :punct && /\A;\z/ =~ pair.around_close(1).text
|
84
|
+
# ident(...);
|
85
|
+
fun.type = :fundecl
|
86
|
+
elsif 1 <= pair.after_close_length &&
|
87
|
+
((pair.around_close(1).type == :punct && /\A\{\z/ =~ pair.around_close(1).text) || # }
|
88
|
+
(pair.around_close(1).type == :ident))
|
89
|
+
# name(...) { ... }
|
90
|
+
# name(...) int arg; { ... }
|
91
|
+
# name(...) struct tag *arg; { ... }
|
92
|
+
# name(...) typedefed_type arg; { ... }
|
93
|
+
fun.type = :fundef
|
94
|
+
else
|
95
|
+
fun.type = :funcall
|
96
|
+
end
|
97
|
+
else
|
98
|
+
if /\Adefined\z/ =~ fun.text &&
|
99
|
+
(outer = pair.outer) &&
|
100
|
+
!outer.outer &&
|
101
|
+
outer.pair_type == :preproc &&
|
102
|
+
1 <= outer.after_open_length &&
|
103
|
+
/\Aif\z/ =~ outer.around_open(1).text
|
104
|
+
# #if ... defined(...)
|
105
|
+
else
|
106
|
+
fun.type = :funcall
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
Keywords = %w(
|
116
|
+
auto break case char const continue default do
|
117
|
+
double else enum extern float for goto if int
|
118
|
+
long register return short signed sizeof static
|
119
|
+
struct switch typedef union unsigned void volatile
|
120
|
+
while
|
121
|
+
)
|
122
|
+
KeywordsHash = {}
|
123
|
+
Keywords.each {|k| KeywordsHash[k] = k }
|
124
|
+
|
125
|
+
Types = %w(char double float int long short void)
|
126
|
+
TypesHash = {}
|
127
|
+
Types.each {|k| TypesHash[k] = k }
|
128
|
+
|
129
|
+
IdentType = Hash.new(:ident)
|
130
|
+
Keywords.each {|k| IdentType[k] = :keyword }
|
131
|
+
Types.each {|k| IdentType[k] = :type }
|
132
|
+
|
133
|
+
# for debug
|
134
|
+
def C.each_fragment(input)
|
135
|
+
tokenizer = LangScan::C::Tokenizer.new(input)
|
136
|
+
while t = tokenizer.get_token
|
137
|
+
yield t
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
LangScan.register(self)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
@@ -0,0 +1,101 @@
|
|
1
|
+
#
|
2
|
+
# csharp.rb - a C# module of LangScan
|
3
|
+
#
|
4
|
+
# Copyright (C) 2005 Kenichi Ishibashi <bashi at dream.ie.ariake-nct.ac.jp>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'langscan/csharp/csharp'
|
13
|
+
require 'langscan/_common'
|
14
|
+
require 'langscan/_pairmatcher'
|
15
|
+
|
16
|
+
module LangScan
|
17
|
+
module Csharp
|
18
|
+
module_function
|
19
|
+
def name
|
20
|
+
"C#"
|
21
|
+
end
|
22
|
+
|
23
|
+
def abbrev
|
24
|
+
"csharp"
|
25
|
+
end
|
26
|
+
|
27
|
+
def extnames
|
28
|
+
[".cs"]
|
29
|
+
end
|
30
|
+
|
31
|
+
# LangScan::Csharp.scan iterates over Csharp program.
|
32
|
+
# It yields for each Fragment.
|
33
|
+
def scan(input, &block)
|
34
|
+
sorter = PairMatcher.fragmentsorter(block)
|
35
|
+
scan_unsorted(input, &sorter)
|
36
|
+
end
|
37
|
+
|
38
|
+
def scan_unsorted(input, &block)
|
39
|
+
pm = LangScan::PairMatcher.new(1,0,0,1)
|
40
|
+
pm.define_intertoken_fragment :space, nil
|
41
|
+
pm.define_intertoken_fragment :comment, nil
|
42
|
+
pm.define_pair :paren, :punct, "(", :punct, ")"
|
43
|
+
pm.parse(LangScan::Csharp::Tokenizer.new(input), lambda {|f|
|
44
|
+
if f.type == :ident
|
45
|
+
f.type = IdentType[f.text]
|
46
|
+
end
|
47
|
+
if f.type == :delegate
|
48
|
+
f.type = :ident
|
49
|
+
end
|
50
|
+
yield f
|
51
|
+
}) {|pair|
|
52
|
+
if 1 <= pair.before_open_length &&
|
53
|
+
pair.around_open(-1).type == :ident &&
|
54
|
+
IdentType[pair.around_open(-1).text] == :ident
|
55
|
+
before_open_token = pair.around_open(-1)
|
56
|
+
if !KeywordsHash[before_open_token.text]
|
57
|
+
if !(outer = pair.outer) || !outer.outer
|
58
|
+
if 1 <= pair.after_close_length &&
|
59
|
+
(pair.around_close(1).type == :punct &&
|
60
|
+
pair.around_close(1).text == '{' ||
|
61
|
+
pair.around_close(1).text == ':')
|
62
|
+
before_open_token.type = :fundef
|
63
|
+
else
|
64
|
+
before_open_token.type = :funcall
|
65
|
+
end
|
66
|
+
else
|
67
|
+
before_open_token.type = :funcall
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
}
|
72
|
+
end
|
73
|
+
|
74
|
+
Keywords = %w(
|
75
|
+
abstract as base bool break byte case catch char checked class const
|
76
|
+
continue decimal default delegate do double else enum event explicit
|
77
|
+
extern false finally fixed float for foreach goto if implicit in int
|
78
|
+
interface internal is lock long namespace new null object operator
|
79
|
+
out override params private protected public readonly ref return sbyte
|
80
|
+
sealed short sizeof stackalloc static string struct switch this throw
|
81
|
+
true try typeof uint ulong unchecked unsafe ushort using virtual void
|
82
|
+
volatile while
|
83
|
+
)
|
84
|
+
KeywordsHash = {}
|
85
|
+
Keywords.each {|k| KeywordsHash[k] = k }
|
86
|
+
|
87
|
+
Types = %w(
|
88
|
+
bool byte char double decimal float int long sbyte short uint ulong
|
89
|
+
ushort void
|
90
|
+
)
|
91
|
+
TypesHash = {}
|
92
|
+
Types.each {|k| TypesHash[k] = k }
|
93
|
+
|
94
|
+
IdentType = Hash.new(:ident)
|
95
|
+
Keywords.each {|k| IdentType[k] = :keyword }
|
96
|
+
Types.each {|k| IdentType[k] = :type }
|
97
|
+
|
98
|
+
LangScan.register(self)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
data/lib/langscan/css.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
#
|
2
|
+
# css.rb - a CSS module of LangScan
|
3
|
+
#
|
4
|
+
# Copyright (C) 2005 Kouichirou Eto <2005 at eto.com>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'langscan/_easyscanner'
|
13
|
+
|
14
|
+
module LangScan
|
15
|
+
module CSS
|
16
|
+
module_function
|
17
|
+
def name
|
18
|
+
"CSS"
|
19
|
+
end
|
20
|
+
|
21
|
+
def abbrev
|
22
|
+
"css"
|
23
|
+
end
|
24
|
+
|
25
|
+
def extnames
|
26
|
+
[".css"]
|
27
|
+
end
|
28
|
+
|
29
|
+
Pattern = [
|
30
|
+
[:comment, "/\\*", "\\*/"],
|
31
|
+
[:string, "\"", "[^\\\\]\""],
|
32
|
+
[:string, "\\(", "[^\\\\]\\)"],
|
33
|
+
[:keyword, "\\!\s*important"],
|
34
|
+
# [:ident, "[-@\\.\\#\\>\\w]+"],
|
35
|
+
[:ident, "[-@\\w]+"],
|
36
|
+
[:integer, "\\d[\\.\\w\\d%]+"],
|
37
|
+
[:punct, "\\."],
|
38
|
+
[:punct, "\\#"],
|
39
|
+
[:punct, "\\{"],
|
40
|
+
[:punct, "\\}"],
|
41
|
+
[:punct, "\\:"],
|
42
|
+
[:punct, "\\;"],
|
43
|
+
]
|
44
|
+
|
45
|
+
Types = []
|
46
|
+
|
47
|
+
Keywords = %w(
|
48
|
+
url
|
49
|
+
@import
|
50
|
+
important
|
51
|
+
)
|
52
|
+
|
53
|
+
def goback(new_tokens)
|
54
|
+
for i in 0...new_tokens.length
|
55
|
+
past_token = new_tokens[new_tokens.length-1-i] # take it from the last
|
56
|
+
if past_token
|
57
|
+
if past_token.type == :ident || past_token.type == :keyword
|
58
|
+
past_token.type = :fundef
|
59
|
+
end
|
60
|
+
|
61
|
+
if past_token.type == :punct &&
|
62
|
+
(past_token.text == "}" || past_token.text == ";")
|
63
|
+
break
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def parse_token(t, new_tokens)
|
70
|
+
last_token = new_tokens.last
|
71
|
+
return if last_token.nil?
|
72
|
+
|
73
|
+
return unless t.type == :punct and last_token.type == :ident
|
74
|
+
|
75
|
+
if t.text == ':'
|
76
|
+
last_token.type = :keyword
|
77
|
+
return
|
78
|
+
end
|
79
|
+
|
80
|
+
if t.text == '{'
|
81
|
+
goback(new_tokens)
|
82
|
+
return
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# LangScan::CSS.scan iterates over CSS file.
|
87
|
+
# It yields for each Fragment.
|
88
|
+
def scan(input, &block)
|
89
|
+
scanner = EasyScanner.new(Pattern, Types, Keywords)
|
90
|
+
|
91
|
+
tokens = []
|
92
|
+
scanner.scan(input) {|t|
|
93
|
+
tokens << t
|
94
|
+
}
|
95
|
+
|
96
|
+
new_tokens = []
|
97
|
+
tokens.each {|t|
|
98
|
+
parse_token(t, new_tokens)
|
99
|
+
new_tokens << t
|
100
|
+
}
|
101
|
+
|
102
|
+
new_tokens.each {|t|
|
103
|
+
yield t
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
LangScan.register(self)
|
108
|
+
end
|
109
|
+
end
|
data/lib/langscan/d.rb
ADDED
@@ -0,0 +1,201 @@
|
|
1
|
+
#
|
2
|
+
# d.rb - a D module of LangScan
|
3
|
+
#
|
4
|
+
# Copyright (C) 2005 Shinichiro Hamaji <hamaji@nii.ac.jp>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'langscan/d/d'
|
13
|
+
require 'langscan/_common'
|
14
|
+
require 'langscan/_pairmatcher'
|
15
|
+
|
16
|
+
module LangScan
|
17
|
+
module D
|
18
|
+
module_function
|
19
|
+
|
20
|
+
DEFAULT_ALLOW_TEMPLATE_PARAMETERS = 3
|
21
|
+
|
22
|
+
def name
|
23
|
+
"D"
|
24
|
+
end
|
25
|
+
|
26
|
+
def abbrev
|
27
|
+
"d"
|
28
|
+
end
|
29
|
+
|
30
|
+
def extnames
|
31
|
+
[".d"]
|
32
|
+
end
|
33
|
+
|
34
|
+
def is_fun?(fun)
|
35
|
+
fun.type == :ident && IdentType[fun.text] == :ident
|
36
|
+
end
|
37
|
+
|
38
|
+
def is_fundef?(pair)
|
39
|
+
1 <= pair.after_close_length &&
|
40
|
+
(pair.around_close(1).type == :punct &&
|
41
|
+
/\A\{\z/ =~ pair.around_close(1).text)
|
42
|
+
end
|
43
|
+
|
44
|
+
def is_template?(pair)
|
45
|
+
t = pair.around_open(-2)
|
46
|
+
if (t && (t.text == 'template'))
|
47
|
+
return true
|
48
|
+
end
|
49
|
+
|
50
|
+
return false
|
51
|
+
end
|
52
|
+
|
53
|
+
def is_new?(pair)
|
54
|
+
t = pair.around_open(-2)
|
55
|
+
if (t && (t.text == 'new'))
|
56
|
+
return true
|
57
|
+
end
|
58
|
+
|
59
|
+
return false
|
60
|
+
end
|
61
|
+
|
62
|
+
def is_fplike?(pair)
|
63
|
+
if pair.around_open(1) && pair.around_open(1).text == '*'
|
64
|
+
pair.around_close(1) && pair.around_close(1).text == '('
|
65
|
+
# type (*arg)());
|
66
|
+
return true
|
67
|
+
end
|
68
|
+
|
69
|
+
t = pair.around_open(-2)
|
70
|
+
if (t && (t.text == 'delegate' || t.text == 'function'))
|
71
|
+
return true
|
72
|
+
end
|
73
|
+
|
74
|
+
return false
|
75
|
+
end
|
76
|
+
|
77
|
+
def get_funtype(o)
|
78
|
+
return :fundecl if (!o)
|
79
|
+
|
80
|
+
return :funcall if (o.pair_def[1] != '{')
|
81
|
+
|
82
|
+
l = o.around_open(-1)
|
83
|
+
|
84
|
+
return :fundecl if (!l || l.type == :classdef)
|
85
|
+
|
86
|
+
lt = l.text
|
87
|
+
|
88
|
+
if (lt == 'in' || lt == 'out' || lt == 'body' || lt == 'unittest')
|
89
|
+
return :funcall
|
90
|
+
end
|
91
|
+
|
92
|
+
if (lt == ')')
|
93
|
+
i = -2
|
94
|
+
while (o.around_open(i) && o.around_open(i).text != '(')
|
95
|
+
i -= 1
|
96
|
+
end
|
97
|
+
|
98
|
+
# mismatched paren not reached, this is if or for or...
|
99
|
+
return :funcall if (!o.around_open(i))
|
100
|
+
|
101
|
+
l2 = o.around_open(i-1)
|
102
|
+
# this is if or for or...
|
103
|
+
return :funcall if (!l2)
|
104
|
+
|
105
|
+
l2t = l2.text
|
106
|
+
|
107
|
+
if (l2t != 'version' && l2t != 'extern' && l2t != 'debug' && l2t != 'pragma')
|
108
|
+
l3 = o.around_open(i-2)
|
109
|
+
return :fundecl if (l3 && l3.text == 'template')
|
110
|
+
|
111
|
+
return :funcall
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
get_funtype(o.outer)
|
117
|
+
end
|
118
|
+
|
119
|
+
# LangScan::D.scan iterates over D program.
|
120
|
+
# It yields for each element which is interested by gonzui.
|
121
|
+
#
|
122
|
+
def scan(input, atp = DEFAULT_ALLOW_TEMPLATE_PARAMETERS, &block)
|
123
|
+
sorter = PairMatcher.fragmentsorter(block)
|
124
|
+
scan_unsorted(input, atp, &sorter)
|
125
|
+
end
|
126
|
+
|
127
|
+
def scan_unsorted(input, atp = DEFAULT_ALLOW_TEMPLATE_PARAMETERS, &block)
|
128
|
+
before_num = 3 + atp * 2
|
129
|
+
before_num = 4 if (before_num < 4)
|
130
|
+
|
131
|
+
pm = LangScan::PairMatcher.new(before_num,2,2,2)
|
132
|
+
pm.define_intertoken_fragment :space, nil
|
133
|
+
pm.define_intertoken_fragment :preproc, nil
|
134
|
+
pm.define_intertoken_fragment :comment, nil
|
135
|
+
pm.define_pair :paren, :punct, "(", :punct, ")"
|
136
|
+
pm.define_pair :brace, :punct, "{", :punct, "}"
|
137
|
+
pm.define_pair :bracket, :punct, "[", :punct, "]"
|
138
|
+
pm.parse(LangScan::D::Tokenizer.new(input), lambda {|f|
|
139
|
+
if f.type == :ident
|
140
|
+
f.type = IdentType[f.text]
|
141
|
+
end
|
142
|
+
yield f
|
143
|
+
}) {|pair|
|
144
|
+
if pair.pair_type == :paren
|
145
|
+
if 1 <= pair.before_open_length
|
146
|
+
fun = pair.around_open(-1)
|
147
|
+
if is_fun?(fun)
|
148
|
+
# ident(...)
|
149
|
+
if is_new?(pair)
|
150
|
+
fun.type = :classref
|
151
|
+
elsif is_template?(pair)
|
152
|
+
fun.type = :moduledef
|
153
|
+
elsif is_fplike?(pair)
|
154
|
+
elsif is_fundef?(pair)
|
155
|
+
# name(...) { ... }
|
156
|
+
fun.type = :fundef
|
157
|
+
else
|
158
|
+
outer = pair.outer
|
159
|
+
fun.type = get_funtype(outer)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
}
|
165
|
+
end
|
166
|
+
|
167
|
+
Keywords = %w(
|
168
|
+
abstract alias align asm assert auto bit body break byte
|
169
|
+
case cast catch cdouble cent cfloat char class const continue creal
|
170
|
+
dchar debug default delegate delete deprecated do double
|
171
|
+
else enum export extern false final finally float for foreach function
|
172
|
+
goto idouble if ifloat import in inout int interface invariant ireal is
|
173
|
+
long mixin module new null out override package pragma private protected
|
174
|
+
public real return short static struct super switch synchronized
|
175
|
+
template this ~this throw true try typedef typeid typeof ubyte
|
176
|
+
ucent uint ulong union unittest ushort version void volatile
|
177
|
+
wchar while with
|
178
|
+
)
|
179
|
+
KeywordsHash = {}
|
180
|
+
Keywords.each {|k| KeywordsHash[k] = k }
|
181
|
+
|
182
|
+
Types = %w(bool char double float int long short void)
|
183
|
+
TypesHash = {}
|
184
|
+
Types.each {|k| TypesHash[k] = k }
|
185
|
+
|
186
|
+
IdentType = Hash.new(:ident)
|
187
|
+
Keywords.each {|k| IdentType[k] = :keyword }
|
188
|
+
Types.each {|k| IdentType[k] = :type }
|
189
|
+
|
190
|
+
# for debug
|
191
|
+
def D.each_fragment(input)
|
192
|
+
tokenizer = LangScan::D::Tokenizer.new(input)
|
193
|
+
while t = tokenizer.get_token
|
194
|
+
yield t
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
LangScan.register(self)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|