langscan 1.2-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS.txt +19 -0
- data/History.txt +126 -0
- data/Manifest.txt +167 -0
- data/README.rdoc +91 -0
- data/Rakefile +40 -0
- data/ext/langscan/_make_c.rb +20 -0
- data/ext/langscan/_make_h.rb +30 -0
- data/ext/langscan/_template.c +134 -0
- data/ext/langscan/_template.h +53 -0
- data/ext/langscan/c/c/Makefile +188 -0
- data/ext/langscan/c/c/c.c +134 -0
- data/ext/langscan/c/c/c.h +66 -0
- data/ext/langscan/c/c/ctok.c +4629 -0
- data/ext/langscan/c/c/ctok.l +212 -0
- data/ext/langscan/c/c/extconf.rb +3 -0
- data/ext/langscan/c/c/modulename.txt +1 -0
- data/ext/langscan/c/c/tokenlist.txt +13 -0
- data/ext/langscan/csharp/csharp/Makefile +188 -0
- data/ext/langscan/csharp/csharp/csharp.c +134 -0
- data/ext/langscan/csharp/csharp/csharp.h +65 -0
- data/ext/langscan/csharp/csharp/csharptok.c +2971 -0
- data/ext/langscan/csharp/csharp/csharptok.l +200 -0
- data/ext/langscan/csharp/csharp/extconf.rb +3 -0
- data/ext/langscan/csharp/csharp/modulename.txt +1 -0
- data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
- data/ext/langscan/d/d/Makefile +188 -0
- data/ext/langscan/d/d/d.c +134 -0
- data/ext/langscan/d/d/d.h +64 -0
- data/ext/langscan/d/d/dtok.c +5468 -0
- data/ext/langscan/d/d/dtok.l +282 -0
- data/ext/langscan/d/d/extconf.rb +3 -0
- data/ext/langscan/d/d/modulename.txt +1 -0
- data/ext/langscan/d/d/tokenlist.txt +11 -0
- data/ext/langscan/elisp/elisp/Makefile +188 -0
- data/ext/langscan/elisp/elisp/elisp.c +134 -0
- data/ext/langscan/elisp/elisp/elisp.h +62 -0
- data/ext/langscan/elisp/elisp/elisptok.c +2108 -0
- data/ext/langscan/elisp/elisp/elisptok.l +151 -0
- data/ext/langscan/elisp/elisp/extconf.rb +3 -0
- data/ext/langscan/elisp/elisp/modulename.txt +1 -0
- data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
- data/ext/langscan/java/java/Makefile +188 -0
- data/ext/langscan/java/java/extconf.rb +3 -0
- data/ext/langscan/java/java/java.c +134 -0
- data/ext/langscan/java/java/java.h +64 -0
- data/ext/langscan/java/java/javatok.c +2097 -0
- data/ext/langscan/java/java/javatok.l +155 -0
- data/ext/langscan/java/java/modulename.txt +1 -0
- data/ext/langscan/java/java/tokenlist.txt +11 -0
- data/ext/langscan/javascript/javascript/Makefile +188 -0
- data/ext/langscan/javascript/javascript/extconf.rb +3 -0
- data/ext/langscan/javascript/javascript/javascript.c +134 -0
- data/ext/langscan/javascript/javascript/javascript.h +63 -0
- data/ext/langscan/javascript/javascript/javascripttok.c +2058 -0
- data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
- data/ext/langscan/javascript/javascript/modulename.txt +1 -0
- data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
- data/ext/langscan/pairmatcher/pairmatcher/Makefile +188 -0
- data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
- data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
- data/ext/langscan/php/php/Makefile +188 -0
- data/ext/langscan/php/php/extconf.rb +3 -0
- data/ext/langscan/php/php/modulename.txt +1 -0
- data/ext/langscan/php/php/php.c +134 -0
- data/ext/langscan/php/php/php.h +64 -0
- data/ext/langscan/php/php/phptok.c +2413 -0
- data/ext/langscan/php/php/phptok.l +212 -0
- data/ext/langscan/php/php/tokenlist.txt +11 -0
- data/ext/langscan/post-distclean.rb +21 -0
- data/ext/langscan/pre-config.rb +57 -0
- data/ext/langscan/python/python/Makefile +188 -0
- data/ext/langscan/python/python/extconf.rb +3 -0
- data/ext/langscan/python/python/modulename.txt +1 -0
- data/ext/langscan/python/python/python.c +134 -0
- data/ext/langscan/python/python/python.h +61 -0
- data/ext/langscan/python/python/pythontok.c +2109 -0
- data/ext/langscan/python/python/pythontok.l +155 -0
- data/ext/langscan/python/python/tokenlist.txt +8 -0
- data/ext/langscan/ruby/compat/ripper/Makefile +189 -0
- data/ext/langscan/ruby/compat/ripper/depend +1 -0
- data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
- data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
- data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
- data/ext/langscan/scheme/scheme/Makefile +188 -0
- data/ext/langscan/scheme/scheme/extconf.rb +3 -0
- data/ext/langscan/scheme/scheme/modulename.txt +1 -0
- data/ext/langscan/scheme/scheme/scheme.c +134 -0
- data/ext/langscan/scheme/scheme/scheme.h +60 -0
- data/ext/langscan/scheme/scheme/schemetok.c +2454 -0
- data/ext/langscan/scheme/scheme/schemetok.l +177 -0
- data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
- data/ext/langscan/sh/sh/Makefile +188 -0
- data/ext/langscan/sh/sh/extconf.rb +3 -0
- data/ext/langscan/sh/sh/modulename.txt +1 -0
- data/ext/langscan/sh/sh/sh.c +134 -0
- data/ext/langscan/sh/sh/sh.h +61 -0
- data/ext/langscan/sh/sh/shtok.c +2477 -0
- data/ext/langscan/sh/sh/shtok.l +325 -0
- data/ext/langscan/sh/sh/tokenlist.txt +8 -0
- data/lib/langscan.rb +124 -0
- data/lib/langscan/_common.rb +50 -0
- data/lib/langscan/_easyscanner.rb +78 -0
- data/lib/langscan/_pairmatcher.rb +46 -0
- data/lib/langscan/_type.rb +125 -0
- data/lib/langscan/autoconf.rb +51 -0
- data/lib/langscan/automake.rb +51 -0
- data/lib/langscan/brainfuck.rb +48 -0
- data/lib/langscan/c.rb +144 -0
- data/lib/langscan/c/c.so +0 -0
- data/lib/langscan/csharp.rb +101 -0
- data/lib/langscan/csharp/csharp.so +0 -0
- data/lib/langscan/css.rb +109 -0
- data/lib/langscan/d.rb +201 -0
- data/lib/langscan/d/d.so +0 -0
- data/lib/langscan/eiffel.rb +167 -0
- data/lib/langscan/elisp.rb +132 -0
- data/lib/langscan/elisp/elisp.so +0 -0
- data/lib/langscan/io.rb +84 -0
- data/lib/langscan/java.rb +95 -0
- data/lib/langscan/java/java.so +0 -0
- data/lib/langscan/javascript.rb +97 -0
- data/lib/langscan/javascript/javascript.so +0 -0
- data/lib/langscan/lua.rb +116 -0
- data/lib/langscan/ocaml.rb +298 -0
- data/lib/langscan/ocaml/camlexer.ml +28 -0
- data/lib/langscan/ocaml/lexer.mll +230 -0
- data/lib/langscan/ocaml/types.ml +36 -0
- data/lib/langscan/pairmatcher/pairmatcher.so +0 -0
- data/lib/langscan/perl.rb +87 -0
- data/lib/langscan/perl/tokenizer.pl +231 -0
- data/lib/langscan/php.rb +80 -0
- data/lib/langscan/php/php.so +0 -0
- data/lib/langscan/python.rb +101 -0
- data/lib/langscan/python/python.so +0 -0
- data/lib/langscan/rpmspec.rb +71 -0
- data/lib/langscan/ruby.rb +164 -0
- data/lib/langscan/ruby/compat/README +5 -0
- data/lib/langscan/ruby/compat/ripper.rb +4 -0
- data/lib/langscan/ruby/compat/ripper.so +0 -0
- data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
- data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
- data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
- data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
- data/lib/langscan/scheme.rb +160 -0
- data/lib/langscan/scheme/scheme.so +0 -0
- data/lib/langscan/sh.rb +116 -0
- data/lib/langscan/sh/sh.so +0 -0
- data/lib/langscan/text.rb +37 -0
- data/metaconfig +2 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/setup.rb +1604 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/langscan.rake +42 -0
- data/test/langscan/brainfuck/test/test_scan.rb +55 -0
- data/test/langscan/c/test/test_scan.rb +216 -0
- data/test/langscan/c/test/test_token.rb +41 -0
- data/test/langscan/csharp/test/test_scan.rb +157 -0
- data/test/langscan/css/test/test_css.rb +79 -0
- data/test/langscan/d/test/test_scan.rb +233 -0
- data/test/langscan/d/test/test_token.rb +205 -0
- data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
- data/test/langscan/elisp/test/test_elisp.rb +177 -0
- data/test/langscan/io/test/test_io.rb +79 -0
- data/test/langscan/java/test/test_java.rb +74 -0
- data/test/langscan/javascript/test/test_javascript.rb +39 -0
- data/test/langscan/lua/test/test_lua.rb +69 -0
- data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
- data/test/langscan/php/test/test_scan.rb +138 -0
- data/test/langscan/python/test/test_scan.rb +105 -0
- data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
- data/test/langscan/ruby/test/test_scan.rb +71 -0
- data/test/langscan/scheme/test/test_scan.rb +198 -0
- data/test/test_helper.rb +7 -0
- data/test/test_langscan.rb +123 -0
- metadata +320 -0
data/lib/langscan/c.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
#
|
2
|
+
# c.rb - a C module of LangScan
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Akira Tanaka <akr@m17n.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'langscan/c/c'
|
13
|
+
require 'langscan/_common'
|
14
|
+
require 'langscan/_pairmatcher'
|
15
|
+
|
16
|
+
module LangScan
|
17
|
+
module C
|
18
|
+
module_function
|
19
|
+
def name
|
20
|
+
"C/C++"
|
21
|
+
end
|
22
|
+
|
23
|
+
def abbrev
|
24
|
+
"c"
|
25
|
+
end
|
26
|
+
|
27
|
+
def extnames
|
28
|
+
[".c", ".h", ".cc", ".cpp"]
|
29
|
+
end
|
30
|
+
|
31
|
+
# LangScan::C.scan iterates over C program.
|
32
|
+
# It yields for each Fragment.
|
33
|
+
def scan(input, &block)
|
34
|
+
sorter = PairMatcher.fragmentsorter(block)
|
35
|
+
scan_unsorted(input, &sorter)
|
36
|
+
end
|
37
|
+
|
38
|
+
def scan_unsorted(input, &block)
|
39
|
+
pm = LangScan::PairMatcher.new(3,2,2,2)
|
40
|
+
pm.define_intertoken_fragment :space, nil
|
41
|
+
pm.define_intertoken_fragment :comment, nil
|
42
|
+
pm.define_pair :paren, :punct, "(", :punct, ")"
|
43
|
+
pm.define_pair :brace, :punct, "{", :punct, "}"
|
44
|
+
pm.define_pair :bracket, :punct, "[", :punct, "]"
|
45
|
+
pm.define_pair :preproc, :preproc_beg, "#", :preproc_end, "\n"
|
46
|
+
pm.parse(LangScan::C::Tokenizer.new(input), lambda {|f|
|
47
|
+
if f.type == :ident
|
48
|
+
f.type = IdentType[f.text]
|
49
|
+
end
|
50
|
+
yield f
|
51
|
+
}) {|pair|
|
52
|
+
if pair.pair_type == :paren
|
53
|
+
if 1 <= pair.before_open_length
|
54
|
+
fun = pair.around_open(-1)
|
55
|
+
if fun.type == :ident && IdentType[fun.text] == :ident
|
56
|
+
# ident(...)
|
57
|
+
if (outer = pair.outer) && pair.outmost.pair_type == :paren
|
58
|
+
# type ident(type (*arg)());
|
59
|
+
elsif outer &&
|
60
|
+
outer.pair_type == :preproc &&
|
61
|
+
2 <= outer.after_open_length &&
|
62
|
+
outer.around_open(1).type == :ident && /\Adefine\z/ =~ outer.around_open(1).text &&
|
63
|
+
outer.around_open(2) == pair.around_open(-1)
|
64
|
+
# #define ident(...)
|
65
|
+
# #define ident (...)
|
66
|
+
if pair.around_open(-1).end_byteno == pair.open_token.beg_byteno
|
67
|
+
# #define ident(...)
|
68
|
+
fun.type = :fundef
|
69
|
+
end
|
70
|
+
elsif !outer ||
|
71
|
+
(!outer.outer && # extern "C" { ... }
|
72
|
+
outer.pair_type == :brace &&
|
73
|
+
2 <= outer.before_open_length &&
|
74
|
+
outer.around_open(-2).type == :ident && /\Aextern\z/ =~ outer.around_open(-2).text &&
|
75
|
+
outer.around_open(-1).type == :string && /\A"C"\z/ =~ outer.around_open(-1).text)
|
76
|
+
if 2 <= pair.before_open_length &&
|
77
|
+
pair.around_open(1).type == :punct && pair.around_open(1).text == '(' &&
|
78
|
+
pair.around_close(-1).type == :punct && pair.around_close(-1).text == ')' &&
|
79
|
+
pair.around_open(-2).type == :ident && IdentType[pair.around_open(-2).text] == :ident
|
80
|
+
# ident ident((...))
|
81
|
+
pair.around_open(-2).type = :fundecl
|
82
|
+
elsif 1 <= pair.after_close_length &&
|
83
|
+
pair.around_close(1).type == :punct && /\A;\z/ =~ pair.around_close(1).text
|
84
|
+
# ident(...);
|
85
|
+
fun.type = :fundecl
|
86
|
+
elsif 1 <= pair.after_close_length &&
|
87
|
+
((pair.around_close(1).type == :punct && /\A\{\z/ =~ pair.around_close(1).text) || # }
|
88
|
+
(pair.around_close(1).type == :ident))
|
89
|
+
# name(...) { ... }
|
90
|
+
# name(...) int arg; { ... }
|
91
|
+
# name(...) struct tag *arg; { ... }
|
92
|
+
# name(...) typedefed_type arg; { ... }
|
93
|
+
fun.type = :fundef
|
94
|
+
else
|
95
|
+
fun.type = :funcall
|
96
|
+
end
|
97
|
+
else
|
98
|
+
if /\Adefined\z/ =~ fun.text &&
|
99
|
+
(outer = pair.outer) &&
|
100
|
+
!outer.outer &&
|
101
|
+
outer.pair_type == :preproc &&
|
102
|
+
1 <= outer.after_open_length &&
|
103
|
+
/\Aif\z/ =~ outer.around_open(1).text
|
104
|
+
# #if ... defined(...)
|
105
|
+
else
|
106
|
+
fun.type = :funcall
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
Keywords = %w(
|
116
|
+
auto break case char const continue default do
|
117
|
+
double else enum extern float for goto if int
|
118
|
+
long register return short signed sizeof static
|
119
|
+
struct switch typedef union unsigned void volatile
|
120
|
+
while
|
121
|
+
)
|
122
|
+
KeywordsHash = {}
|
123
|
+
Keywords.each {|k| KeywordsHash[k] = k }
|
124
|
+
|
125
|
+
Types = %w(char double float int long short void)
|
126
|
+
TypesHash = {}
|
127
|
+
Types.each {|k| TypesHash[k] = k }
|
128
|
+
|
129
|
+
IdentType = Hash.new(:ident)
|
130
|
+
Keywords.each {|k| IdentType[k] = :keyword }
|
131
|
+
Types.each {|k| IdentType[k] = :type }
|
132
|
+
|
133
|
+
# for debug
|
134
|
+
def C.each_fragment(input)
|
135
|
+
tokenizer = LangScan::C::Tokenizer.new(input)
|
136
|
+
while t = tokenizer.get_token
|
137
|
+
yield t
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
LangScan.register(self)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
data/lib/langscan/c/c.so
ADDED
Binary file
|
@@ -0,0 +1,101 @@
|
|
1
|
+
#
|
2
|
+
# csharp.rb - a C# module of LangScan
|
3
|
+
#
|
4
|
+
# Copyright (C) 2005 Kenichi Ishibashi <bashi at dream.ie.ariake-nct.ac.jp>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'langscan/csharp/csharp'
|
13
|
+
require 'langscan/_common'
|
14
|
+
require 'langscan/_pairmatcher'
|
15
|
+
|
16
|
+
module LangScan
|
17
|
+
module Csharp
|
18
|
+
module_function
|
19
|
+
def name
|
20
|
+
"C#"
|
21
|
+
end
|
22
|
+
|
23
|
+
def abbrev
|
24
|
+
"csharp"
|
25
|
+
end
|
26
|
+
|
27
|
+
def extnames
|
28
|
+
[".cs"]
|
29
|
+
end
|
30
|
+
|
31
|
+
# LangScan::Csharp.scan iterates over Csharp program.
|
32
|
+
# It yields for each Fragment.
|
33
|
+
def scan(input, &block)
|
34
|
+
sorter = PairMatcher.fragmentsorter(block)
|
35
|
+
scan_unsorted(input, &sorter)
|
36
|
+
end
|
37
|
+
|
38
|
+
def scan_unsorted(input, &block)
|
39
|
+
pm = LangScan::PairMatcher.new(1,0,0,1)
|
40
|
+
pm.define_intertoken_fragment :space, nil
|
41
|
+
pm.define_intertoken_fragment :comment, nil
|
42
|
+
pm.define_pair :paren, :punct, "(", :punct, ")"
|
43
|
+
pm.parse(LangScan::Csharp::Tokenizer.new(input), lambda {|f|
|
44
|
+
if f.type == :ident
|
45
|
+
f.type = IdentType[f.text]
|
46
|
+
end
|
47
|
+
if f.type == :delegate
|
48
|
+
f.type = :ident
|
49
|
+
end
|
50
|
+
yield f
|
51
|
+
}) {|pair|
|
52
|
+
if 1 <= pair.before_open_length &&
|
53
|
+
pair.around_open(-1).type == :ident &&
|
54
|
+
IdentType[pair.around_open(-1).text] == :ident
|
55
|
+
before_open_token = pair.around_open(-1)
|
56
|
+
if !KeywordsHash[before_open_token.text]
|
57
|
+
if !(outer = pair.outer) || !outer.outer
|
58
|
+
if 1 <= pair.after_close_length &&
|
59
|
+
(pair.around_close(1).type == :punct &&
|
60
|
+
pair.around_close(1).text == '{' ||
|
61
|
+
pair.around_close(1).text == ':')
|
62
|
+
before_open_token.type = :fundef
|
63
|
+
else
|
64
|
+
before_open_token.type = :funcall
|
65
|
+
end
|
66
|
+
else
|
67
|
+
before_open_token.type = :funcall
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
}
|
72
|
+
end
|
73
|
+
|
74
|
+
Keywords = %w(
|
75
|
+
abstract as base bool break byte case catch char checked class const
|
76
|
+
continue decimal default delegate do double else enum event explicit
|
77
|
+
extern false finally fixed float for foreach goto if implicit in int
|
78
|
+
interface internal is lock long namespace new null object operator
|
79
|
+
out override params private protected public readonly ref return sbyte
|
80
|
+
sealed short sizeof stackalloc static string struct switch this throw
|
81
|
+
true try typeof uint ulong unchecked unsafe ushort using virtual void
|
82
|
+
volatile while
|
83
|
+
)
|
84
|
+
KeywordsHash = {}
|
85
|
+
Keywords.each {|k| KeywordsHash[k] = k }
|
86
|
+
|
87
|
+
Types = %w(
|
88
|
+
bool byte char double decimal float int long sbyte short uint ulong
|
89
|
+
ushort void
|
90
|
+
)
|
91
|
+
TypesHash = {}
|
92
|
+
Types.each {|k| TypesHash[k] = k }
|
93
|
+
|
94
|
+
IdentType = Hash.new(:ident)
|
95
|
+
Keywords.each {|k| IdentType[k] = :keyword }
|
96
|
+
Types.each {|k| IdentType[k] = :type }
|
97
|
+
|
98
|
+
LangScan.register(self)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
Binary file
|
data/lib/langscan/css.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
#
|
2
|
+
# css.rb - a CSS module of LangScan
|
3
|
+
#
|
4
|
+
# Copyright (C) 2005 Kouichirou Eto <2005 at eto.com>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'langscan/_easyscanner'
|
13
|
+
|
14
|
+
module LangScan
|
15
|
+
module CSS
|
16
|
+
module_function
|
17
|
+
def name
|
18
|
+
"CSS"
|
19
|
+
end
|
20
|
+
|
21
|
+
def abbrev
|
22
|
+
"css"
|
23
|
+
end
|
24
|
+
|
25
|
+
def extnames
|
26
|
+
[".css"]
|
27
|
+
end
|
28
|
+
|
29
|
+
Pattern = [
|
30
|
+
[:comment, "/\\*", "\\*/"],
|
31
|
+
[:string, "\"", "[^\\\\]\""],
|
32
|
+
[:string, "\\(", "[^\\\\]\\)"],
|
33
|
+
[:keyword, "\\!\s*important"],
|
34
|
+
# [:ident, "[-@\\.\\#\\>\\w]+"],
|
35
|
+
[:ident, "[-@\\w]+"],
|
36
|
+
[:integer, "\\d[\\.\\w\\d%]+"],
|
37
|
+
[:punct, "\\."],
|
38
|
+
[:punct, "\\#"],
|
39
|
+
[:punct, "\\{"],
|
40
|
+
[:punct, "\\}"],
|
41
|
+
[:punct, "\\:"],
|
42
|
+
[:punct, "\\;"],
|
43
|
+
]
|
44
|
+
|
45
|
+
Types = []
|
46
|
+
|
47
|
+
Keywords = %w(
|
48
|
+
url
|
49
|
+
@import
|
50
|
+
important
|
51
|
+
)
|
52
|
+
|
53
|
+
def goback(new_tokens)
|
54
|
+
for i in 0...new_tokens.length
|
55
|
+
past_token = new_tokens[new_tokens.length-1-i] # take it from the last
|
56
|
+
if past_token
|
57
|
+
if past_token.type == :ident || past_token.type == :keyword
|
58
|
+
past_token.type = :fundef
|
59
|
+
end
|
60
|
+
|
61
|
+
if past_token.type == :punct &&
|
62
|
+
(past_token.text == "}" || past_token.text == ";")
|
63
|
+
break
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def parse_token(t, new_tokens)
|
70
|
+
last_token = new_tokens.last
|
71
|
+
return if last_token.nil?
|
72
|
+
|
73
|
+
return unless t.type == :punct and last_token.type == :ident
|
74
|
+
|
75
|
+
if t.text == ':'
|
76
|
+
last_token.type = :keyword
|
77
|
+
return
|
78
|
+
end
|
79
|
+
|
80
|
+
if t.text == '{'
|
81
|
+
goback(new_tokens)
|
82
|
+
return
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# LangScan::CSS.scan iterates over CSS file.
|
87
|
+
# It yields for each Fragment.
|
88
|
+
def scan(input, &block)
|
89
|
+
scanner = EasyScanner.new(Pattern, Types, Keywords)
|
90
|
+
|
91
|
+
tokens = []
|
92
|
+
scanner.scan(input) {|t|
|
93
|
+
tokens << t
|
94
|
+
}
|
95
|
+
|
96
|
+
new_tokens = []
|
97
|
+
tokens.each {|t|
|
98
|
+
parse_token(t, new_tokens)
|
99
|
+
new_tokens << t
|
100
|
+
}
|
101
|
+
|
102
|
+
new_tokens.each {|t|
|
103
|
+
yield t
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
LangScan.register(self)
|
108
|
+
end
|
109
|
+
end
|
data/lib/langscan/d.rb
ADDED
@@ -0,0 +1,201 @@
|
|
1
|
+
#
|
2
|
+
# d.rb - a D module of LangScan
|
3
|
+
#
|
4
|
+
# Copyright (C) 2005 Shinichiro Hamaji <hamaji@nii.ac.jp>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'langscan/d/d'
|
13
|
+
require 'langscan/_common'
|
14
|
+
require 'langscan/_pairmatcher'
|
15
|
+
|
16
|
+
module LangScan
|
17
|
+
module D
|
18
|
+
module_function
|
19
|
+
|
20
|
+
DEFAULT_ALLOW_TEMPLATE_PARAMETERS = 3
|
21
|
+
|
22
|
+
def name
|
23
|
+
"D"
|
24
|
+
end
|
25
|
+
|
26
|
+
def abbrev
|
27
|
+
"d"
|
28
|
+
end
|
29
|
+
|
30
|
+
def extnames
|
31
|
+
[".d"]
|
32
|
+
end
|
33
|
+
|
34
|
+
def is_fun?(fun)
|
35
|
+
fun.type == :ident && IdentType[fun.text] == :ident
|
36
|
+
end
|
37
|
+
|
38
|
+
def is_fundef?(pair)
|
39
|
+
1 <= pair.after_close_length &&
|
40
|
+
(pair.around_close(1).type == :punct &&
|
41
|
+
/\A\{\z/ =~ pair.around_close(1).text)
|
42
|
+
end
|
43
|
+
|
44
|
+
def is_template?(pair)
|
45
|
+
t = pair.around_open(-2)
|
46
|
+
if (t && (t.text == 'template'))
|
47
|
+
return true
|
48
|
+
end
|
49
|
+
|
50
|
+
return false
|
51
|
+
end
|
52
|
+
|
53
|
+
def is_new?(pair)
|
54
|
+
t = pair.around_open(-2)
|
55
|
+
if (t && (t.text == 'new'))
|
56
|
+
return true
|
57
|
+
end
|
58
|
+
|
59
|
+
return false
|
60
|
+
end
|
61
|
+
|
62
|
+
def is_fplike?(pair)
|
63
|
+
if pair.around_open(1) && pair.around_open(1).text == '*'
|
64
|
+
pair.around_close(1) && pair.around_close(1).text == '('
|
65
|
+
# type (*arg)());
|
66
|
+
return true
|
67
|
+
end
|
68
|
+
|
69
|
+
t = pair.around_open(-2)
|
70
|
+
if (t && (t.text == 'delegate' || t.text == 'function'))
|
71
|
+
return true
|
72
|
+
end
|
73
|
+
|
74
|
+
return false
|
75
|
+
end
|
76
|
+
|
77
|
+
def get_funtype(o)
|
78
|
+
return :fundecl if (!o)
|
79
|
+
|
80
|
+
return :funcall if (o.pair_def[1] != '{')
|
81
|
+
|
82
|
+
l = o.around_open(-1)
|
83
|
+
|
84
|
+
return :fundecl if (!l || l.type == :classdef)
|
85
|
+
|
86
|
+
lt = l.text
|
87
|
+
|
88
|
+
if (lt == 'in' || lt == 'out' || lt == 'body' || lt == 'unittest')
|
89
|
+
return :funcall
|
90
|
+
end
|
91
|
+
|
92
|
+
if (lt == ')')
|
93
|
+
i = -2
|
94
|
+
while (o.around_open(i) && o.around_open(i).text != '(')
|
95
|
+
i -= 1
|
96
|
+
end
|
97
|
+
|
98
|
+
# mismatched paren not reached, this is if or for or...
|
99
|
+
return :funcall if (!o.around_open(i))
|
100
|
+
|
101
|
+
l2 = o.around_open(i-1)
|
102
|
+
# this is if or for or...
|
103
|
+
return :funcall if (!l2)
|
104
|
+
|
105
|
+
l2t = l2.text
|
106
|
+
|
107
|
+
if (l2t != 'version' && l2t != 'extern' && l2t != 'debug' && l2t != 'pragma')
|
108
|
+
l3 = o.around_open(i-2)
|
109
|
+
return :fundecl if (l3 && l3.text == 'template')
|
110
|
+
|
111
|
+
return :funcall
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
get_funtype(o.outer)
|
117
|
+
end
|
118
|
+
|
119
|
+
# LangScan::D.scan iterates over D program.
|
120
|
+
# It yields for each element which is interested by gonzui.
|
121
|
+
#
|
122
|
+
def scan(input, atp = DEFAULT_ALLOW_TEMPLATE_PARAMETERS, &block)
|
123
|
+
sorter = PairMatcher.fragmentsorter(block)
|
124
|
+
scan_unsorted(input, atp, &sorter)
|
125
|
+
end
|
126
|
+
|
127
|
+
def scan_unsorted(input, atp = DEFAULT_ALLOW_TEMPLATE_PARAMETERS, &block)
|
128
|
+
before_num = 3 + atp * 2
|
129
|
+
before_num = 4 if (before_num < 4)
|
130
|
+
|
131
|
+
pm = LangScan::PairMatcher.new(before_num,2,2,2)
|
132
|
+
pm.define_intertoken_fragment :space, nil
|
133
|
+
pm.define_intertoken_fragment :preproc, nil
|
134
|
+
pm.define_intertoken_fragment :comment, nil
|
135
|
+
pm.define_pair :paren, :punct, "(", :punct, ")"
|
136
|
+
pm.define_pair :brace, :punct, "{", :punct, "}"
|
137
|
+
pm.define_pair :bracket, :punct, "[", :punct, "]"
|
138
|
+
pm.parse(LangScan::D::Tokenizer.new(input), lambda {|f|
|
139
|
+
if f.type == :ident
|
140
|
+
f.type = IdentType[f.text]
|
141
|
+
end
|
142
|
+
yield f
|
143
|
+
}) {|pair|
|
144
|
+
if pair.pair_type == :paren
|
145
|
+
if 1 <= pair.before_open_length
|
146
|
+
fun = pair.around_open(-1)
|
147
|
+
if is_fun?(fun)
|
148
|
+
# ident(...)
|
149
|
+
if is_new?(pair)
|
150
|
+
fun.type = :classref
|
151
|
+
elsif is_template?(pair)
|
152
|
+
fun.type = :moduledef
|
153
|
+
elsif is_fplike?(pair)
|
154
|
+
elsif is_fundef?(pair)
|
155
|
+
# name(...) { ... }
|
156
|
+
fun.type = :fundef
|
157
|
+
else
|
158
|
+
outer = pair.outer
|
159
|
+
fun.type = get_funtype(outer)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
}
|
165
|
+
end
|
166
|
+
|
167
|
+
Keywords = %w(
|
168
|
+
abstract alias align asm assert auto bit body break byte
|
169
|
+
case cast catch cdouble cent cfloat char class const continue creal
|
170
|
+
dchar debug default delegate delete deprecated do double
|
171
|
+
else enum export extern false final finally float for foreach function
|
172
|
+
goto idouble if ifloat import in inout int interface invariant ireal is
|
173
|
+
long mixin module new null out override package pragma private protected
|
174
|
+
public real return short static struct super switch synchronized
|
175
|
+
template this ~this throw true try typedef typeid typeof ubyte
|
176
|
+
ucent uint ulong union unittest ushort version void volatile
|
177
|
+
wchar while with
|
178
|
+
)
|
179
|
+
KeywordsHash = {}
|
180
|
+
Keywords.each {|k| KeywordsHash[k] = k }
|
181
|
+
|
182
|
+
Types = %w(bool char double float int long short void)
|
183
|
+
TypesHash = {}
|
184
|
+
Types.each {|k| TypesHash[k] = k }
|
185
|
+
|
186
|
+
IdentType = Hash.new(:ident)
|
187
|
+
Keywords.each {|k| IdentType[k] = :keyword }
|
188
|
+
Types.each {|k| IdentType[k] = :type }
|
189
|
+
|
190
|
+
# for debug
|
191
|
+
def D.each_fragment(input)
|
192
|
+
tokenizer = LangScan::D::Tokenizer.new(input)
|
193
|
+
while t = tokenizer.get_token
|
194
|
+
yield t
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
LangScan.register(self)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|