langscan 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. data/AUTHORS.txt +19 -0
  2. data/History.txt +126 -0
  3. data/Manifest.txt +167 -0
  4. data/README.rdoc +89 -0
  5. data/Rakefile +40 -0
  6. data/ext/langscan/_make_c.rb +20 -0
  7. data/ext/langscan/_make_h.rb +30 -0
  8. data/ext/langscan/_template.c +134 -0
  9. data/ext/langscan/_template.h +53 -0
  10. data/ext/langscan/c/c/Makefile +157 -0
  11. data/ext/langscan/c/c/c.c +134 -0
  12. data/ext/langscan/c/c/c.h +66 -0
  13. data/ext/langscan/c/c/ctok.c +4622 -0
  14. data/ext/langscan/c/c/ctok.l +212 -0
  15. data/ext/langscan/c/c/extconf.rb +3 -0
  16. data/ext/langscan/c/c/modulename.txt +1 -0
  17. data/ext/langscan/c/c/tokenlist.txt +13 -0
  18. data/ext/langscan/csharp/csharp/Makefile +157 -0
  19. data/ext/langscan/csharp/csharp/csharp.c +134 -0
  20. data/ext/langscan/csharp/csharp/csharp.h +65 -0
  21. data/ext/langscan/csharp/csharp/csharptok.c +2965 -0
  22. data/ext/langscan/csharp/csharp/csharptok.l +200 -0
  23. data/ext/langscan/csharp/csharp/extconf.rb +3 -0
  24. data/ext/langscan/csharp/csharp/modulename.txt +1 -0
  25. data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
  26. data/ext/langscan/d/d/Makefile +157 -0
  27. data/ext/langscan/d/d/d.c +134 -0
  28. data/ext/langscan/d/d/d.h +64 -0
  29. data/ext/langscan/d/d/dtok.c +5461 -0
  30. data/ext/langscan/d/d/dtok.l +282 -0
  31. data/ext/langscan/d/d/extconf.rb +3 -0
  32. data/ext/langscan/d/d/modulename.txt +1 -0
  33. data/ext/langscan/d/d/tokenlist.txt +11 -0
  34. data/ext/langscan/elisp/elisp/Makefile +157 -0
  35. data/ext/langscan/elisp/elisp/elisp.c +134 -0
  36. data/ext/langscan/elisp/elisp/elisp.h +62 -0
  37. data/ext/langscan/elisp/elisp/elisptok.c +2101 -0
  38. data/ext/langscan/elisp/elisp/elisptok.l +151 -0
  39. data/ext/langscan/elisp/elisp/extconf.rb +3 -0
  40. data/ext/langscan/elisp/elisp/modulename.txt +1 -0
  41. data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
  42. data/ext/langscan/java/java/Makefile +157 -0
  43. data/ext/langscan/java/java/extconf.rb +3 -0
  44. data/ext/langscan/java/java/java.c +134 -0
  45. data/ext/langscan/java/java/java.h +64 -0
  46. data/ext/langscan/java/java/javatok.c +2090 -0
  47. data/ext/langscan/java/java/javatok.l +155 -0
  48. data/ext/langscan/java/java/modulename.txt +1 -0
  49. data/ext/langscan/java/java/tokenlist.txt +11 -0
  50. data/ext/langscan/javascript/javascript/Makefile +157 -0
  51. data/ext/langscan/javascript/javascript/extconf.rb +3 -0
  52. data/ext/langscan/javascript/javascript/javascript.c +134 -0
  53. data/ext/langscan/javascript/javascript/javascript.h +63 -0
  54. data/ext/langscan/javascript/javascript/javascripttok.c +2051 -0
  55. data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
  56. data/ext/langscan/javascript/javascript/modulename.txt +1 -0
  57. data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
  58. data/ext/langscan/pairmatcher/pairmatcher/Makefile +157 -0
  59. data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
  60. data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
  61. data/ext/langscan/php/php/Makefile +157 -0
  62. data/ext/langscan/php/php/extconf.rb +3 -0
  63. data/ext/langscan/php/php/modulename.txt +1 -0
  64. data/ext/langscan/php/php/php.c +134 -0
  65. data/ext/langscan/php/php/php.h +64 -0
  66. data/ext/langscan/php/php/phptok.c +2406 -0
  67. data/ext/langscan/php/php/phptok.l +212 -0
  68. data/ext/langscan/php/php/tokenlist.txt +11 -0
  69. data/ext/langscan/post-distclean.rb +21 -0
  70. data/ext/langscan/pre-config.rb +57 -0
  71. data/ext/langscan/python/python/Makefile +157 -0
  72. data/ext/langscan/python/python/extconf.rb +3 -0
  73. data/ext/langscan/python/python/modulename.txt +1 -0
  74. data/ext/langscan/python/python/python.c +134 -0
  75. data/ext/langscan/python/python/python.h +61 -0
  76. data/ext/langscan/python/python/pythontok.c +2102 -0
  77. data/ext/langscan/python/python/pythontok.l +155 -0
  78. data/ext/langscan/python/python/tokenlist.txt +8 -0
  79. data/ext/langscan/ruby/compat/ripper/Makefile +158 -0
  80. data/ext/langscan/ruby/compat/ripper/depend +1 -0
  81. data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
  82. data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
  83. data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
  84. data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
  85. data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
  86. data/ext/langscan/scheme/scheme/Makefile +157 -0
  87. data/ext/langscan/scheme/scheme/extconf.rb +3 -0
  88. data/ext/langscan/scheme/scheme/modulename.txt +1 -0
  89. data/ext/langscan/scheme/scheme/scheme.c +134 -0
  90. data/ext/langscan/scheme/scheme/scheme.h +60 -0
  91. data/ext/langscan/scheme/scheme/schemetok.c +2447 -0
  92. data/ext/langscan/scheme/scheme/schemetok.l +177 -0
  93. data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
  94. data/ext/langscan/sh/sh/Makefile +157 -0
  95. data/ext/langscan/sh/sh/extconf.rb +3 -0
  96. data/ext/langscan/sh/sh/modulename.txt +1 -0
  97. data/ext/langscan/sh/sh/sh.c +134 -0
  98. data/ext/langscan/sh/sh/sh.h +61 -0
  99. data/ext/langscan/sh/sh/shtok.c +2470 -0
  100. data/ext/langscan/sh/sh/shtok.l +325 -0
  101. data/ext/langscan/sh/sh/tokenlist.txt +8 -0
  102. data/lib/langscan.rb +124 -0
  103. data/lib/langscan/_common.rb +50 -0
  104. data/lib/langscan/_easyscanner.rb +78 -0
  105. data/lib/langscan/_pairmatcher.rb +46 -0
  106. data/lib/langscan/_type.rb +125 -0
  107. data/lib/langscan/autoconf.rb +51 -0
  108. data/lib/langscan/automake.rb +51 -0
  109. data/lib/langscan/brainfuck.rb +48 -0
  110. data/lib/langscan/c.rb +144 -0
  111. data/lib/langscan/csharp.rb +101 -0
  112. data/lib/langscan/css.rb +109 -0
  113. data/lib/langscan/d.rb +201 -0
  114. data/lib/langscan/eiffel.rb +167 -0
  115. data/lib/langscan/elisp.rb +132 -0
  116. data/lib/langscan/io.rb +84 -0
  117. data/lib/langscan/java.rb +95 -0
  118. data/lib/langscan/javascript.rb +97 -0
  119. data/lib/langscan/lua.rb +116 -0
  120. data/lib/langscan/ocaml.rb +298 -0
  121. data/lib/langscan/ocaml/camlexer.ml +28 -0
  122. data/lib/langscan/ocaml/lexer.mll +230 -0
  123. data/lib/langscan/ocaml/types.ml +36 -0
  124. data/lib/langscan/perl.rb +87 -0
  125. data/lib/langscan/perl/tokenizer.pl +231 -0
  126. data/lib/langscan/php.rb +80 -0
  127. data/lib/langscan/python.rb +101 -0
  128. data/lib/langscan/rpmspec.rb +71 -0
  129. data/lib/langscan/ruby.rb +164 -0
  130. data/lib/langscan/ruby/compat/README +5 -0
  131. data/lib/langscan/ruby/compat/ripper.rb +4 -0
  132. data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
  133. data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
  134. data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
  135. data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
  136. data/lib/langscan/scheme.rb +160 -0
  137. data/lib/langscan/sh.rb +116 -0
  138. data/lib/langscan/text.rb +37 -0
  139. data/metaconfig +2 -0
  140. data/script/console +10 -0
  141. data/script/destroy +14 -0
  142. data/script/generate +14 -0
  143. data/script/makemanifest.rb +21 -0
  144. data/setup.rb +1604 -0
  145. data/tasks/extconf.rake +13 -0
  146. data/tasks/extconf/langscan.rake +42 -0
  147. data/test/langscan/brainfuck/test/test_scan.rb +55 -0
  148. data/test/langscan/c/test/test_scan.rb +216 -0
  149. data/test/langscan/c/test/test_token.rb +41 -0
  150. data/test/langscan/csharp/test/test_scan.rb +157 -0
  151. data/test/langscan/css/test/test_css.rb +79 -0
  152. data/test/langscan/d/test/test_scan.rb +233 -0
  153. data/test/langscan/d/test/test_token.rb +205 -0
  154. data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
  155. data/test/langscan/elisp/test/test_elisp.rb +177 -0
  156. data/test/langscan/io/test/test_io.rb +79 -0
  157. data/test/langscan/java/test/test_java.rb +74 -0
  158. data/test/langscan/javascript/test/test_javascript.rb +39 -0
  159. data/test/langscan/lua/test/test_lua.rb +69 -0
  160. data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
  161. data/test/langscan/php/test/test_scan.rb +138 -0
  162. data/test/langscan/python/test/test_scan.rb +105 -0
  163. data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
  164. data/test/langscan/ruby/test/test_scan.rb +71 -0
  165. data/test/langscan/scheme/test/test_scan.rb +198 -0
  166. data/test/test_helper.rb +7 -0
  167. data/test/test_langscan.rb +123 -0
  168. metadata +296 -0
@@ -0,0 +1,97 @@
1
+ #
2
+ # javascript.rb - a JavaScript module of LangScan
3
+ #
4
+ # Copyright (C) 2004-2005 Keisuke Nishida <knishida@open-cobol.org>
5
+ # Copyright (C) 2005 Kouichirou Eto <2005 at eto.com>
6
+ # All rights reserved.
7
+ # This is free software with ABSOLUTELY NO WARRANTY.
8
+ #
9
+ # You can redistribute it and/or modify it under the terms of
10
+ # the GNU General Public License version 2.
11
+ #
12
+
13
+ require 'langscan/javascript/javascript'
14
+ require 'langscan/_common'
15
+ require 'langscan/_pairmatcher'
16
+
17
+ module LangScan
18
+ module JavaScript
19
+ module_function
20
+ def name
21
+ "JavaScript"
22
+ end
23
+
24
+ def abbrev
25
+ "js"
26
+ end
27
+
28
+ def extnames
29
+ [".js"]
30
+ end
31
+
32
+ # LangScan::JavaScript.scan iterates over JavaScript program.
33
+ # It yields for each Fragment.
34
+ def scan(input, &block)
35
+ sorter = PairMatcher.fragmentsorter(block)
36
+ scan_unsorted(input, &sorter)
37
+ end
38
+
39
+ def scan_unsorted(input, &block)
40
+ pm = LangScan::PairMatcher.new(1,0,0,1)
41
+ pm.define_intertoken_fragment :space, nil
42
+ pm.define_intertoken_fragment :comment, nil
43
+ pm.define_pair :paren, :punct, "(", :punct, ")"
44
+ pm.define_pair :brace, :punct, "{", :punct, "}"
45
+ pm.define_pair :bracket, :punct, "[", :punct, "]"
46
+ pm.parse(LangScan::JavaScript::Tokenizer.new(input), lambda {|f|
47
+ if f.type == :ident
48
+ f.type = IdentType[f.text]
49
+ end
50
+ yield f
51
+ }) {|pair|
52
+ if pair.pair_type == :paren &&
53
+ 1 <= pair.before_open_length &&
54
+ pair.around_open(-1).type == :ident && IdentType[pair.around_open(-1).text] == :ident
55
+ before_open_token = pair.around_open(-1)
56
+ if !KeywordsHash[before_open_token.text]
57
+ if !(outer = pair.outer) || !outer.outer
58
+ if 1 <= pair.after_close_length &&
59
+ pair.around_close(1).type == :punct &&
60
+ pair.around_close(1).text == '{'
61
+ before_open_token.type = :fundef
62
+ else
63
+ before_open_token.type = :funcall
64
+ end
65
+ else
66
+ before_open_token.type = :funcall
67
+ end
68
+ end
69
+ end
70
+ }
71
+ end
72
+
73
+ Keywords = %w(
74
+ break else new var
75
+ case finally return void
76
+ catch for switch while
77
+ continue function this with
78
+ default if throw
79
+ delete in try
80
+ do instanceof typeof
81
+ null true false
82
+ )
83
+ KeywordsHash = {}
84
+ Keywords.each {|k| KeywordsHash[k] = k }
85
+
86
+ Types = []
87
+ TypesHash = {}
88
+ Types.each {|k| TypesHash[k] = k }
89
+
90
+ IdentType = Hash.new(:ident)
91
+ Keywords.each {|k| IdentType[k] = :keyword }
92
+ Types.each {|k| IdentType[k] = :type }
93
+
94
+ LangScan.register(self)
95
+ end
96
+ end
97
+
@@ -0,0 +1,116 @@
1
+ #
2
+ # lua.rb - a Lua module of LangScan
3
+ #
4
+ # Copyright (C) 2005 Shinichiro Hamaji <hamaji@nii.ac.jp>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ require 'langscan/_easyscanner'
13
+ require 'langscan/_pairmatcher'
14
+
15
+ module LangScan
16
+ module Lua
17
+ module_function
18
+ def name
19
+ "Lua"
20
+ end
21
+
22
+ def abbrev
23
+ "lua"
24
+ end
25
+
26
+ def extnames
27
+ [".lua"]
28
+ end
29
+
30
+ Pattern = [
31
+ [:comment, '--.*'],
32
+ [:string, '\\[\\[', '\\]\\]'],
33
+ [:string, '""'],
34
+ [:string, '"', '[^\\\\]"'],
35
+ [:string, "''"],
36
+ [:string, "'", "[^\\\\]'"],
37
+ [:floating, '\\d+\\.\\d+(?:[eE]-?\\d+)?'],
38
+ [:integer, '\\d+'],
39
+ [:ident, "[a-zA-Z_]\\w*"],
40
+ [:punct, '[*+-/^=<>(){}\\[\\];:,\\.]'],
41
+ [:punct, '(?:~=|<=|>=|==|\\.\\.|\\.\\.\\.)'],
42
+ ]
43
+
44
+ Types = []
45
+
46
+ Keywords = %w(and break do else elseif end false for function if in local
47
+ nil not or repeat return then true until while)
48
+
49
+ def parse_token(t, new_tokens)
50
+ if t.type == :ident
51
+ t.type = :funcall
52
+ end
53
+
54
+ last_token = new_tokens.last
55
+ return if last_token.nil?
56
+
57
+ return unless t.type == :punct and last_token.type == :funcall
58
+
59
+ if t.text == ':=' || t.text == '='
60
+ last_token.type = :fundef
61
+ end
62
+ end
63
+
64
+ def scan(input, &block)
65
+ pm = LangScan::PairMatcher.new(3,2,2,2)
66
+ pm.define_intertoken_fragment :comment, nil
67
+ pm.define_pair :paren, :punct, "(", :punct, ")"
68
+ pm.define_pair :brace, :punct, "{", :punct, "}"
69
+ pm.define_pair :bracket, :punct, "[", :punct, "]"
70
+
71
+ tokens = Array.new
72
+ scanner = EasyScanner.new(Pattern, Types, Keywords)
73
+ scanner.scan(input) do |t|
74
+ tokens << [t.type, t.text, t.lineno, nil, t.byteno, nil, nil, nil]
75
+ end
76
+
77
+ def tokens.get_token
78
+ self.shift
79
+ end
80
+
81
+ pm.parse(tokens, lambda {|f|
82
+ if f.type == :ident
83
+ f.type = IdentType[f.text]
84
+ end
85
+ yield f
86
+ }) {|pair|
87
+ if (pair.pair_type == :paren)
88
+ fun = pair.around_open(-1)
89
+ if (fun)
90
+ if (fun.type == :ident)
91
+ f = pair.around_open(-2)
92
+ if (f && f.type == :keyword && f.text == 'function')
93
+ fun.type = :fundef
94
+ else
95
+ fun.type = :funcall
96
+ end
97
+ elsif (fun.type == :keyword && fun.text == 'function')
98
+ f = pair.around_open(-2)
99
+ if (f && f.type == :punct && f.text == '=')
100
+ f = pair.around_open(-3)
101
+ if (f && f.type == :ident)
102
+ f.type = :fundef
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ }
109
+ end
110
+
111
+ IdentType = Hash.new(:ident)
112
+ Keywords.each {|k| IdentType[k] = :keyword }
113
+
114
+ LangScan.register(self)
115
+ end
116
+ end
@@ -0,0 +1,298 @@
1
+ #
2
+ # ocaml.rb - a OCaml module of LangScan
3
+ #
4
+ # Copyright (C) 2005 Soutaro Matsumoto <matsumoto@soutaro.com>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ require 'langscan/_common'
13
+
14
+ module LangScan
15
+ module OCaml
16
+ CAMLEXER_PATH = $LOAD_PATH.map{|path|
17
+ File.join(path, "langscan/ocaml/camlexer")
18
+ }.find {|path| File.file?(path) }
19
+
20
+ class Eof < Exception
21
+ end
22
+
23
+ class Tokenizer
24
+ SYMBOL_TBL = {
25
+ "text" => :text,
26
+ "ident" => :ident,
27
+ "punct" => :punct,
28
+ "keyword" => :keyword,
29
+ "comment" => :comment,
30
+ "integer" => :integer,
31
+ "float" => :float,
32
+ "string" => :string,
33
+ "character" => :character,
34
+ "funcdef" => :funcdef # not implemented yet
35
+ }
36
+
37
+ def initialize(input)
38
+ @io = IO.popen(CAMLEXER_PATH, "r+")
39
+ @tin = Thread.start {
40
+ input.each {|l|
41
+ @io.puts(l)
42
+ }
43
+ @io.close_write()
44
+ }
45
+ end
46
+
47
+ def dispose()
48
+ @tin.join()
49
+ @io.close()
50
+ end
51
+
52
+ def denormalize(str)
53
+ str.gsub(/([^\\])\\o/,'\1'+"\n")
54
+ end
55
+
56
+ def get_token()
57
+ if @io.eof?
58
+ nil
59
+ else
60
+ lno, cno, tp, wd = @io.gets().chomp().split(":",4)
61
+ Fragment.new(SYMBOL_TBL[tp], denormalize(wd), lno.to_i(), cno.to_i())
62
+ end
63
+ end
64
+
65
+ end
66
+
67
+ module_function
68
+
69
+ def name
70
+ "Objective Caml"
71
+ end
72
+
73
+ def abbrev
74
+ "ocaml"
75
+ end
76
+
77
+ def extnames
78
+ [".ml", ".mli", ".mll", ".mly"]
79
+ end
80
+
81
+ def check_token(tkns, index, type, name = nil)
82
+ t = tkns[index]
83
+ raise Eof.new if !t
84
+ return t.type == type && (!name || t.text == name)
85
+ end
86
+
87
+ def go_next(tkns, index, step)
88
+ for i in 0...step
89
+ index += 1
90
+ index += 1 while (check_token(tkns, index, :comment))
91
+ end
92
+ index
93
+ end
94
+
95
+ def check_token_next(tkns, index, step, type, name = nil)
96
+ index = go_next(tkns, index, step)
97
+ check_token(tkns, index, type, name)
98
+ end
99
+
100
+ def go_prev(tkns, index, step)
101
+ for i in 0...step
102
+ index -= 1
103
+ index -= 1 while (check_token(tkns, index, :comment))
104
+ end
105
+ index
106
+ end
107
+
108
+ def check_token_prev(tkns, index, step, type, name = nil)
109
+ index = go_prev(tkns, index, step)
110
+ check_token(tkns, index, type, name)
111
+ end
112
+
113
+ def skip_type(tkns, i)
114
+ while (check_token(tkns, i, :punct, '->') ||
115
+ check_token(tkns, i, :punct, '.') ||
116
+ check_token(tkns, i, :punct, ':') ||
117
+ check_token(tkns, i, :punct, '(') ||
118
+ check_token(tkns, i, :punct, ')') ||
119
+ check_token(tkns, i, :punct, '*') ||
120
+ check_token(tkns, i, :comment) ||
121
+ check_token(tkns, i, :ident))
122
+ i += 1
123
+ end
124
+ i
125
+ end
126
+
127
+ def skip_parameter(tkns, i)
128
+ t = tkns[i]
129
+ return i if !t
130
+ if (t.type == :punct && (t.text == '(' || t.text =~ /^\[\|?/))
131
+ i = go_next(tkns, i, 1)
132
+ first = i
133
+ del = { '(' => ')', '[' => ']', '[|' => '|]' }[t.text]
134
+ while (!check_token(tkns, i, :punct, del))
135
+ i = go_next(tkns, i, 1)
136
+ end
137
+ convert_fun(tkns, first, i)
138
+ end
139
+ i = go_next(tkns, i, 1)
140
+ end
141
+
142
+ def is_first_parameter?(tkns, index)
143
+ t = tkns[index]
144
+ return false if !t
145
+
146
+ if (t.type == :string || t.type == :character)
147
+ return true
148
+ end
149
+
150
+ if (t.type == :keyword)
151
+ return (t.text == '()')
152
+ end
153
+
154
+ if (t.type == :integer || t.type == :float)
155
+ return (t.text !~ /^-/)
156
+ end
157
+
158
+ if (t.type == :ident)
159
+ return (t.text != 'array' && t.text != 'list' && t.text != 'option')
160
+ end
161
+
162
+ if (t.type == :punct)
163
+ return (t.text == '(' || t.text =~ /^\[\|?/)
164
+ end
165
+
166
+ return false
167
+ end
168
+
169
+ def is_method?(tkns, i)
170
+ if (check_token_prev(tkns, i, 1, :keyword, 'virtual'))
171
+ i = go_prev(tkns, i, 1)
172
+ end
173
+ if (check_token_prev(tkns, i, 1, :keyword, 'private'))
174
+ i = go_prev(tkns, i, 1)
175
+ end
176
+ check_token_prev(tkns, i, 1, :keyword, 'method')
177
+ end
178
+
179
+ def is_parameter?(tkns, index)
180
+ return true if (is_first_parameter?(tkns, index))
181
+
182
+ t = tkns[index]
183
+ return false if !t
184
+
185
+ if (t.type == :punct)
186
+ if (t.text == '.' || t.text == '#')
187
+ return true
188
+ end
189
+ end
190
+
191
+ return false
192
+ end
193
+
194
+ def convert_fun(tkns, i, max)
195
+ begin
196
+ while (i < max)
197
+ t = tkns[i]
198
+ if (t.type == :ident)
199
+ if (check_token_prev(tkns, i, 1, :keyword, 'fun'))
200
+ while (!check_token(tkns, i, :punct, '->'))
201
+ i = go_next(tkns, i, 1)
202
+ end
203
+ elsif (check_token_prev(tkns, i, 1, :keyword, 'let') ||
204
+ (check_token_prev(tkns, i, 1, :keyword, 'rec') &&
205
+ check_token_prev(tkns, i, 2, :keyword, 'let')))
206
+ if (!check_token_next(tkns, i, 1, :punct, '='))
207
+ t.type = :fundef
208
+ i += 1 while (!check_token(tkns, i, :punct, '='))
209
+ end
210
+ elsif (check_token_prev(tkns, i, 1, :keyword, 'val') ||
211
+ (check_token_prev(tkns, i, 1, :keyword, 'mutable') &&
212
+ check_token_prev(tkns, i, 2, :keyword, 'val')) ||
213
+ check_token_prev(tkns, i, 1, :keyword, 'external'))
214
+ if (check_token_next(tkns, i, 1, :punct, ':'))
215
+ # not strict
216
+ i = go_next(tkns, i, 2)
217
+ while (!check_token(tkns, i, :keyword))
218
+ if (check_token(tkns, i, :punct, '->'))
219
+ t.type = :fundecl
220
+ i = skip_type(tkns, i+1)
221
+ break
222
+ end
223
+ i = go_next(tkns, i, 1)
224
+ end
225
+ else
226
+ # what?
227
+ end
228
+ elsif (is_method?(tkns, i))
229
+ if (check_token_next(tkns, i, 1, :punct, ':'))
230
+ t.type = :fundecl
231
+ i = go_next(tkns, i, 2)
232
+ i = skip_type(tkns, i)
233
+ else
234
+ t.type = :fundef
235
+ i += 1 while (!check_token(tkns, i, :punct, '='))
236
+ end
237
+ elsif (!check_token_prev(tkns, i, 1, :punct, ':') &&
238
+ !check_token_prev(tkns, i, 1, :punct, '*') &&
239
+ !check_token_prev(tkns, i, 1, :punct, '\'') &&
240
+ !check_token_prev(tkns, i, 1, :punct, '~') &&
241
+ !check_token_prev(tkns, i, 1, :punct, '?') &&
242
+ !check_token_prev(tkns, i, 1, :punct, '|') &&
243
+ !check_token_prev(tkns, i, 1, :keyword, 'with'))
244
+ # is it call?
245
+ i = go_next(tkns, i, 1)
246
+ if (check_token_prev(tkns, i, 2, :punct, '#') ||
247
+ is_first_parameter?(tkns, i))
248
+ t.type = :funcall
249
+ i = skip_parameter(tkns, i)
250
+ while (is_parameter?(tkns, i))
251
+ i = skip_parameter(tkns, i)
252
+ end
253
+ end
254
+ end
255
+ elsif (check_token(tkns, i, :keyword, 'of') ||
256
+ # check_token(tkns, i, :punct, '|') || # overrun ->
257
+ check_token(tkns, i, :punct, ':>'))
258
+ # is it needed?
259
+ i = skip_type(tkns, i+1)
260
+ i = go_prev(tkns, i, 1)
261
+ elsif (check_token(tkns, i, :keyword, 'class'))
262
+ i = go_next(tkns, i, 1) while (!check_token(tkns, i, :punct, '='))
263
+ end
264
+
265
+ i += 1
266
+ end
267
+ rescue Eof
268
+ end
269
+ end
270
+
271
+ # LangScan::OCaml.scan iterates over Objective Caml program.
272
+ # It yields for each Fragment.
273
+ def scan(input, &block)
274
+ tokenizer = Tokenizer.new(input)
275
+
276
+ tkns = Array.new
277
+ while (tkn = tokenizer.get_token())
278
+ # is it ok?
279
+ if (tkn.type == :ident && tkn.text =~ /^\W/)
280
+ tkn.type = :punct
281
+ end
282
+ tkns << tkn
283
+ end
284
+
285
+ convert_fun(tkns, 0, tkns.size)
286
+
287
+ tkns.each do |tkn|
288
+ yield tkn
289
+ end
290
+
291
+ tokenizer.dispose()
292
+ end
293
+
294
+ if CAMLEXER_PATH
295
+ LangScan.register(self)
296
+ end
297
+ end
298
+ end