langscan 1.2-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. data/AUTHORS.txt +19 -0
  2. data/History.txt +126 -0
  3. data/Manifest.txt +167 -0
  4. data/README.rdoc +91 -0
  5. data/Rakefile +40 -0
  6. data/ext/langscan/_make_c.rb +20 -0
  7. data/ext/langscan/_make_h.rb +30 -0
  8. data/ext/langscan/_template.c +134 -0
  9. data/ext/langscan/_template.h +53 -0
  10. data/ext/langscan/c/c/Makefile +188 -0
  11. data/ext/langscan/c/c/c.c +134 -0
  12. data/ext/langscan/c/c/c.h +66 -0
  13. data/ext/langscan/c/c/ctok.c +4629 -0
  14. data/ext/langscan/c/c/ctok.l +212 -0
  15. data/ext/langscan/c/c/extconf.rb +3 -0
  16. data/ext/langscan/c/c/modulename.txt +1 -0
  17. data/ext/langscan/c/c/tokenlist.txt +13 -0
  18. data/ext/langscan/csharp/csharp/Makefile +188 -0
  19. data/ext/langscan/csharp/csharp/csharp.c +134 -0
  20. data/ext/langscan/csharp/csharp/csharp.h +65 -0
  21. data/ext/langscan/csharp/csharp/csharptok.c +2971 -0
  22. data/ext/langscan/csharp/csharp/csharptok.l +200 -0
  23. data/ext/langscan/csharp/csharp/extconf.rb +3 -0
  24. data/ext/langscan/csharp/csharp/modulename.txt +1 -0
  25. data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
  26. data/ext/langscan/d/d/Makefile +188 -0
  27. data/ext/langscan/d/d/d.c +134 -0
  28. data/ext/langscan/d/d/d.h +64 -0
  29. data/ext/langscan/d/d/dtok.c +5468 -0
  30. data/ext/langscan/d/d/dtok.l +282 -0
  31. data/ext/langscan/d/d/extconf.rb +3 -0
  32. data/ext/langscan/d/d/modulename.txt +1 -0
  33. data/ext/langscan/d/d/tokenlist.txt +11 -0
  34. data/ext/langscan/elisp/elisp/Makefile +188 -0
  35. data/ext/langscan/elisp/elisp/elisp.c +134 -0
  36. data/ext/langscan/elisp/elisp/elisp.h +62 -0
  37. data/ext/langscan/elisp/elisp/elisptok.c +2108 -0
  38. data/ext/langscan/elisp/elisp/elisptok.l +151 -0
  39. data/ext/langscan/elisp/elisp/extconf.rb +3 -0
  40. data/ext/langscan/elisp/elisp/modulename.txt +1 -0
  41. data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
  42. data/ext/langscan/java/java/Makefile +188 -0
  43. data/ext/langscan/java/java/extconf.rb +3 -0
  44. data/ext/langscan/java/java/java.c +134 -0
  45. data/ext/langscan/java/java/java.h +64 -0
  46. data/ext/langscan/java/java/javatok.c +2097 -0
  47. data/ext/langscan/java/java/javatok.l +155 -0
  48. data/ext/langscan/java/java/modulename.txt +1 -0
  49. data/ext/langscan/java/java/tokenlist.txt +11 -0
  50. data/ext/langscan/javascript/javascript/Makefile +188 -0
  51. data/ext/langscan/javascript/javascript/extconf.rb +3 -0
  52. data/ext/langscan/javascript/javascript/javascript.c +134 -0
  53. data/ext/langscan/javascript/javascript/javascript.h +63 -0
  54. data/ext/langscan/javascript/javascript/javascripttok.c +2058 -0
  55. data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
  56. data/ext/langscan/javascript/javascript/modulename.txt +1 -0
  57. data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
  58. data/ext/langscan/pairmatcher/pairmatcher/Makefile +188 -0
  59. data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
  60. data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
  61. data/ext/langscan/php/php/Makefile +188 -0
  62. data/ext/langscan/php/php/extconf.rb +3 -0
  63. data/ext/langscan/php/php/modulename.txt +1 -0
  64. data/ext/langscan/php/php/php.c +134 -0
  65. data/ext/langscan/php/php/php.h +64 -0
  66. data/ext/langscan/php/php/phptok.c +2413 -0
  67. data/ext/langscan/php/php/phptok.l +212 -0
  68. data/ext/langscan/php/php/tokenlist.txt +11 -0
  69. data/ext/langscan/post-distclean.rb +21 -0
  70. data/ext/langscan/pre-config.rb +57 -0
  71. data/ext/langscan/python/python/Makefile +188 -0
  72. data/ext/langscan/python/python/extconf.rb +3 -0
  73. data/ext/langscan/python/python/modulename.txt +1 -0
  74. data/ext/langscan/python/python/python.c +134 -0
  75. data/ext/langscan/python/python/python.h +61 -0
  76. data/ext/langscan/python/python/pythontok.c +2109 -0
  77. data/ext/langscan/python/python/pythontok.l +155 -0
  78. data/ext/langscan/python/python/tokenlist.txt +8 -0
  79. data/ext/langscan/ruby/compat/ripper/Makefile +189 -0
  80. data/ext/langscan/ruby/compat/ripper/depend +1 -0
  81. data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
  82. data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
  83. data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
  84. data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
  85. data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
  86. data/ext/langscan/scheme/scheme/Makefile +188 -0
  87. data/ext/langscan/scheme/scheme/extconf.rb +3 -0
  88. data/ext/langscan/scheme/scheme/modulename.txt +1 -0
  89. data/ext/langscan/scheme/scheme/scheme.c +134 -0
  90. data/ext/langscan/scheme/scheme/scheme.h +60 -0
  91. data/ext/langscan/scheme/scheme/schemetok.c +2454 -0
  92. data/ext/langscan/scheme/scheme/schemetok.l +177 -0
  93. data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
  94. data/ext/langscan/sh/sh/Makefile +188 -0
  95. data/ext/langscan/sh/sh/extconf.rb +3 -0
  96. data/ext/langscan/sh/sh/modulename.txt +1 -0
  97. data/ext/langscan/sh/sh/sh.c +134 -0
  98. data/ext/langscan/sh/sh/sh.h +61 -0
  99. data/ext/langscan/sh/sh/shtok.c +2477 -0
  100. data/ext/langscan/sh/sh/shtok.l +325 -0
  101. data/ext/langscan/sh/sh/tokenlist.txt +8 -0
  102. data/lib/langscan.rb +124 -0
  103. data/lib/langscan/_common.rb +50 -0
  104. data/lib/langscan/_easyscanner.rb +78 -0
  105. data/lib/langscan/_pairmatcher.rb +46 -0
  106. data/lib/langscan/_type.rb +125 -0
  107. data/lib/langscan/autoconf.rb +51 -0
  108. data/lib/langscan/automake.rb +51 -0
  109. data/lib/langscan/brainfuck.rb +48 -0
  110. data/lib/langscan/c.rb +144 -0
  111. data/lib/langscan/c/c.so +0 -0
  112. data/lib/langscan/csharp.rb +101 -0
  113. data/lib/langscan/csharp/csharp.so +0 -0
  114. data/lib/langscan/css.rb +109 -0
  115. data/lib/langscan/d.rb +201 -0
  116. data/lib/langscan/d/d.so +0 -0
  117. data/lib/langscan/eiffel.rb +167 -0
  118. data/lib/langscan/elisp.rb +132 -0
  119. data/lib/langscan/elisp/elisp.so +0 -0
  120. data/lib/langscan/io.rb +84 -0
  121. data/lib/langscan/java.rb +95 -0
  122. data/lib/langscan/java/java.so +0 -0
  123. data/lib/langscan/javascript.rb +97 -0
  124. data/lib/langscan/javascript/javascript.so +0 -0
  125. data/lib/langscan/lua.rb +116 -0
  126. data/lib/langscan/ocaml.rb +298 -0
  127. data/lib/langscan/ocaml/camlexer.ml +28 -0
  128. data/lib/langscan/ocaml/lexer.mll +230 -0
  129. data/lib/langscan/ocaml/types.ml +36 -0
  130. data/lib/langscan/pairmatcher/pairmatcher.so +0 -0
  131. data/lib/langscan/perl.rb +87 -0
  132. data/lib/langscan/perl/tokenizer.pl +231 -0
  133. data/lib/langscan/php.rb +80 -0
  134. data/lib/langscan/php/php.so +0 -0
  135. data/lib/langscan/python.rb +101 -0
  136. data/lib/langscan/python/python.so +0 -0
  137. data/lib/langscan/rpmspec.rb +71 -0
  138. data/lib/langscan/ruby.rb +164 -0
  139. data/lib/langscan/ruby/compat/README +5 -0
  140. data/lib/langscan/ruby/compat/ripper.rb +4 -0
  141. data/lib/langscan/ruby/compat/ripper.so +0 -0
  142. data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
  143. data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
  144. data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
  145. data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
  146. data/lib/langscan/scheme.rb +160 -0
  147. data/lib/langscan/scheme/scheme.so +0 -0
  148. data/lib/langscan/sh.rb +116 -0
  149. data/lib/langscan/sh/sh.so +0 -0
  150. data/lib/langscan/text.rb +37 -0
  151. data/metaconfig +2 -0
  152. data/script/console +10 -0
  153. data/script/destroy +14 -0
  154. data/script/generate +14 -0
  155. data/script/makemanifest.rb +21 -0
  156. data/setup.rb +1604 -0
  157. data/tasks/extconf.rake +13 -0
  158. data/tasks/extconf/langscan.rake +42 -0
  159. data/test/langscan/brainfuck/test/test_scan.rb +55 -0
  160. data/test/langscan/c/test/test_scan.rb +216 -0
  161. data/test/langscan/c/test/test_token.rb +41 -0
  162. data/test/langscan/csharp/test/test_scan.rb +157 -0
  163. data/test/langscan/css/test/test_css.rb +79 -0
  164. data/test/langscan/d/test/test_scan.rb +233 -0
  165. data/test/langscan/d/test/test_token.rb +205 -0
  166. data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
  167. data/test/langscan/elisp/test/test_elisp.rb +177 -0
  168. data/test/langscan/io/test/test_io.rb +79 -0
  169. data/test/langscan/java/test/test_java.rb +74 -0
  170. data/test/langscan/javascript/test/test_javascript.rb +39 -0
  171. data/test/langscan/lua/test/test_lua.rb +69 -0
  172. data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
  173. data/test/langscan/php/test/test_scan.rb +138 -0
  174. data/test/langscan/python/test/test_scan.rb +105 -0
  175. data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
  176. data/test/langscan/ruby/test/test_scan.rb +71 -0
  177. data/test/langscan/scheme/test/test_scan.rb +198 -0
  178. data/test/test_helper.rb +7 -0
  179. data/test/test_langscan.rb +123 -0
  180. metadata +320 -0
@@ -0,0 +1,147 @@
1
+ /*
2
+ * javascript.l - a lex rule for JavaScript
3
+ *
4
+ * Copyright (C) 2005 Keisuke Nishida <knishida@open-cobol.org>
5
+ * All rights reserved.
6
+ * This is free software with ABSOLUTELY NO WARRANTY.
7
+ *
8
+ * You can redistribute it and/or modify it under the terms of
9
+ * the GNU General Public License version 2.
10
+ */
11
+
12
+ %option reentrant
13
+ %option prefix="langscan_javascript_lex_"
14
+ %option noyywrap
15
+ %option nodefault
16
+
17
+ slash \/
18
+ star \*
19
+ nonstar [^\*]
20
+ nonslashstar [^\/\*]
21
+ commentcontent {star}+{nonslashstar}{nonstar}*
22
+ comment {slash}{star}{nonstar}*{commentcontent}*{star}+{slash}
23
+
24
+ %{
25
+
26
+ #include "javascript.h"
27
+
28
+ #define YY_EXTRA_TYPE langscan_javascript_lex_extra_t *
29
+
30
+ #if YY_NULL != 0
31
+ #error "YY_NULL is not 0."
32
+ #endif
33
+
34
+ #define YY_DECL langscan_javascript_token_t langscan_javascript_lex_lex(yyscan_t yyscanner)
35
+
36
+ #define YY_INPUT(buf,result,max_size) \
37
+ if (!yyextra->eof) { \
38
+ result = yyextra->user_read(&(yyextra->user_data), (buf), (max_size)); \
39
+ if (result == 0) \
40
+ yyextra->eof = 1; \
41
+ }
42
+
43
+ #define UPD update_pos(yyextra, yytext, yyleng)
44
+ static void update_pos(langscan_javascript_lex_extra_t *, char *, int);
45
+
46
+ #define report(token) \
47
+ do { \
48
+ yyextra->text = yytext; \
49
+ yyextra->leng = yyleng; \
50
+ return langscan_javascript_##token; \
51
+ } while (0)
52
+
53
+ %}
54
+
55
+ %%
56
+ [ \t\f\r]+ { UPD; report(space); }
57
+ \n { UPD; report(space); }
58
+ "//".* { UPD; report(comment); }
59
+ {comment} { UPD; report(comment); }
60
+ \"([^\\\"]|\\.)*\" { UPD; report(string); }
61
+ [A-Za-z_][0-9A-Za-z_]* { UPD; report(ident); }
62
+ . { UPD; report(punct); }
63
+
64
+ %%
65
+
66
+ static void update_pos(
67
+ langscan_javascript_lex_extra_t *extra,
68
+ char *text,
69
+ int leng)
70
+ {
71
+ int i, j;
72
+ extra->beg_byteno = extra->end_byteno;
73
+ extra->beg_lineno = extra->end_lineno;
74
+ extra->beg_columnno = extra->end_columnno;
75
+ j = 0;
76
+ for (i = 0; i < leng; i++) {
77
+ if (text[i] == '\n') {
78
+ extra->end_lineno++;
79
+ j = i + 1;
80
+ extra->end_columnno = 0;
81
+ }
82
+ }
83
+ extra->end_columnno += leng - j;
84
+ extra->end_byteno += leng;
85
+ }
86
+
87
+ langscan_javascript_tokenizer_t *langscan_javascript_make_tokenizer(
88
+ size_t (*user_read)(void **user_data_p, char *buf, size_t maxlen),
89
+ void *user_data)
90
+ {
91
+ langscan_javascript_tokenizer_t *tokenizer;
92
+ langscan_javascript_lex_extra_t *extra;
93
+ tokenizer = (langscan_javascript_tokenizer_t *)malloc(sizeof(langscan_javascript_tokenizer_t));
94
+ if (tokenizer == NULL)
95
+ return NULL;
96
+ extra = (langscan_javascript_lex_extra_t *)malloc(sizeof(langscan_javascript_lex_extra_t));
97
+ if (extra == NULL)
98
+ return NULL;
99
+ extra->user_read = user_read;
100
+ extra->user_data = user_data;
101
+ extra->beg_lineno = 1;
102
+ extra->beg_columnno = 0;
103
+ extra->beg_byteno = 0;
104
+ extra->end_lineno = 1;
105
+ extra->end_columnno = 0;
106
+ extra->end_byteno = 0;
107
+ extra->eof = 0;
108
+ tokenizer->extra = extra;
109
+ langscan_javascript_lex_lex_init(&tokenizer->scanner);
110
+ langscan_javascript_lex_set_extra(extra, tokenizer->scanner);
111
+ return tokenizer;
112
+ }
113
+
114
+ langscan_javascript_token_t langscan_javascript_get_token(langscan_javascript_tokenizer_t *tokenizer)
115
+ {
116
+ return langscan_javascript_lex_lex(tokenizer->scanner);
117
+ }
118
+
119
+ void langscan_javascript_free_tokenizer(langscan_javascript_tokenizer_t *tokenizer)
120
+ {
121
+ langscan_javascript_lex_extra_t *extra = langscan_javascript_lex_get_extra(tokenizer->scanner);
122
+ free((void *)extra);
123
+ langscan_javascript_lex_lex_destroy(tokenizer->scanner);
124
+ free((void *)tokenizer);
125
+ }
126
+
127
+ user_read_t langscan_javascript_tokenizer_get_user_read(langscan_javascript_tokenizer_t *tokenizer)
128
+ {
129
+ return tokenizer->extra->user_read;
130
+ }
131
+
132
+ void *langscan_javascript_tokenizer_get_user_data(langscan_javascript_tokenizer_t *tokenizer)
133
+ {
134
+ return tokenizer->extra->user_data;
135
+ }
136
+
137
+ const char *langscan_javascript_token_name(langscan_javascript_token_t token)
138
+ {
139
+ static char *token_names[] = {
140
+ "*eof*",
141
+ #define LANGSCAN_JAVASCRIPT_TOKEN(name) #name,
142
+ LANGSCAN_JAVASCRIPT_TOKEN_LIST
143
+ #undef LANGSCAN_JAVASCRIPT_TOKEN
144
+ };
145
+
146
+ return token_names[token];
147
+ }
@@ -0,0 +1,10 @@
1
+ preproc_beg
2
+ preproc_end
3
+ character
4
+ integer
5
+ floating
6
+ string
7
+ ident
8
+ punct
9
+ comment
10
+ space
@@ -0,0 +1,188 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = .
7
+ topdir = C:/usr/ruby/lib/ruby/1.8/i386-mswin32
8
+ hdrdir = $(topdir)
9
+ VPATH = $(srcdir);$(topdir);$(hdrdir)
10
+
11
+ DESTDIR = C:
12
+ exec_prefix = $(prefix)
13
+ prefix = $(DESTDIR)/usr/ruby
14
+ sharedstatedir = $(DESTDIR)/etc
15
+ mandir = $(prefix)/man
16
+ oldincludedir = $(DESTDIR)/usr/include
17
+ bindir = $(exec_prefix)/bin
18
+ libexecdir = $(exec_prefix)/libexec
19
+ sitedir = $(prefix)/lib/ruby/site_ruby
20
+ vendorarchdir = $(vendorlibdir)/$(sitearch)
21
+ includedir = $(prefix)/include
22
+ infodir = $(prefix)/info
23
+ vendorlibdir = $(vendordir)/$(ruby_version)
24
+ sysconfdir = $(prefix)/etc
25
+ libdir = $(exec_prefix)/lib
26
+ sbindir = $(exec_prefix)/sbin
27
+ rubylibdir = $(libdir)/ruby/$(ruby_version)
28
+ vendordir = $(prefix)/lib/ruby/vendor_ruby
29
+ archdir = $(rubylibdir)/$(arch)
30
+ sitearchdir = $(sitelibdir)/$(sitearch)
31
+ datadir = $(prefix)/share
32
+ localstatedir = $(DESTDIR)/var
33
+ sitelibdir = $(sitedir)/$(ruby_version)
34
+
35
+ CC = cl -nologo
36
+ LIBRUBY = $(RUBY_SO_NAME).lib
37
+ LIBRUBY_A = $(RUBY_SO_NAME)-static.lib
38
+ LIBRUBYARG_SHARED = $(LIBRUBY)
39
+ LIBRUBYARG_STATIC = $(LIBRUBY_A)
40
+
41
+ RUBY_EXTCONF_H =
42
+ CFLAGS = -MT -Zi -O2b2xg- -G6
43
+ INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
44
+ DEFS =
45
+ CPPFLAGS =
46
+ CXXFLAGS = $(CFLAGS)
47
+ ldflags =
48
+ dldflags = -link -incremental:no -debug -opt:ref -opt:icf -dll $(LIBPATH)
49
+ archflag =
50
+ DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
51
+ LDSHARED = cl -nologo -LD
52
+ AR = lib -nologo
53
+ EXEEXT = .exe
54
+
55
+ RUBY_INSTALL_NAME = ruby
56
+ RUBY_SO_NAME = msvcrt-ruby18
57
+ arch = i386-mswin32
58
+ sitearch = i386-msvcrt
59
+ ruby_version = 1.8
60
+ ruby = C:/usr/ruby/bin/ruby
61
+ RUBY = $(ruby:/=\)
62
+ RM = $(RUBY) -run -e rm -- -f
63
+ MAKEDIRS = @$(RUBY) -run -e mkdir -- -p
64
+ INSTALL = @$(RUBY) -run -e install -- -vp
65
+ INSTALL_PROG = $(INSTALL) -m 0755
66
+ INSTALL_DATA = $(INSTALL) -m 0644
67
+ COPY = copy > nul
68
+
69
+ #### End of system configuration section. ####
70
+
71
+ preload =
72
+
73
+ libpath = . $(libdir)
74
+ LIBPATH = -libpath:"." -libpath:"$(libdir)"
75
+ DEFFILE = $(TARGET)-$(arch).def
76
+
77
+ CLEANFILES = mkmf.log
78
+ DISTCLEANFILES = vc*.pdb $(DEFFILE)
79
+
80
+ extout =
81
+ extout_prefix =
82
+ target_prefix = /langscan/pairmatcher
83
+ LOCAL_LIBS =
84
+ LIBS = $(LIBRUBYARG_SHARED) oldnames.lib user32.lib advapi32.lib shell32.lib ws2_32.lib
85
+ SRCS = pairmatcher.c
86
+ OBJS = pairmatcher.obj
87
+ TARGET = pairmatcher
88
+ DLLIB = $(TARGET).so
89
+ EXTSTATIC =
90
+ STATIC_LIB =
91
+
92
+ BINDIR = $(bindir)
93
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
94
+ RUBYLIBDIR = C:/devel/gem/git/langscan/pkg/langscan-1.2.gem.build/lib$(target_prefix)
95
+ RUBYARCHDIR = C:/devel/gem/git/langscan/pkg/langscan-1.2.gem.build/lib$(target_prefix)
96
+
97
+ TARGET_SO = $(DLLIB)
98
+ CLEANLIBS = $(TARGET).so $(TARGET).il? $(TARGET).tds $(TARGET).map
99
+ CLEANOBJS = *.obj *.lib *.s[ol] *.pdb *.exp *.bak
100
+
101
+ all: $(DLLIB)
102
+ static: $(STATIC_LIB)
103
+
104
+ clean:
105
+ @-$(RM) $(CLEANLIBS:/=\) $(CLEANOBJS:/=\) $(CLEANFILES:/=\)
106
+
107
+ distclean: clean
108
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
109
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES:/=\)
110
+
111
+ realclean: distclean
112
+ install: install-so install-rb
113
+
114
+ install-so: $(RUBYARCHDIR)
115
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
116
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
117
+ $(INSTALL_PROG) $(DLLIB:/=\) $(RUBYARCHDIR:/=\)
118
+ install-rb: pre-install-rb install-rb-default
119
+ install-rb-default: pre-install-rb-default
120
+ pre-install-rb: Makefile
121
+ pre-install-rb-default: Makefile
122
+ $(RUBYARCHDIR):
123
+ $(MAKEDIRS) $@
124
+
125
+ site-install: site-install-so site-install-rb
126
+ site-install-so: install-so
127
+ site-install-rb: install-rb
128
+
129
+ .SUFFIXES: .c .m .cc .cxx .cpp .obj
130
+
131
+ {$(hdrdir)}.cc{}.obj:
132
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
133
+
134
+ {$(topdir)}.cc{}.obj:
135
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
136
+
137
+ {$(srcdir)}.cc{}.obj:
138
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
139
+
140
+ .cc.obj:
141
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
142
+
143
+ {$(hdrdir)}.cxx{}.obj:
144
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
145
+
146
+ {$(topdir)}.cxx{}.obj:
147
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
148
+
149
+ {$(srcdir)}.cxx{}.obj:
150
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
151
+
152
+ .cxx.obj:
153
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
154
+
155
+ {$(hdrdir)}.cpp{}.obj:
156
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
157
+
158
+ {$(topdir)}.cpp{}.obj:
159
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
160
+
161
+ {$(srcdir)}.cpp{}.obj:
162
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
163
+
164
+ .cpp.obj:
165
+ $(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
166
+
167
+ {$(hdrdir)}.c{}.obj:
168
+ $(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
169
+
170
+ {$(topdir)}.c{}.obj:
171
+ $(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
172
+
173
+ {$(srcdir)}.c{}.obj:
174
+ $(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
175
+
176
+ .c.obj:
177
+ $(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
178
+
179
+ $(DLLIB): $(DEFFILE) $(OBJS) Makefile
180
+ @-$(RM) $@
181
+ $(LDSHARED) -Fe$(@) $(OBJS) $(LIBS) $(LOCAL_LIBS) $(DLDFLAGS) -implib:$(*F:.so=)-$(arch).lib -pdb:$(*F:.so=)-$(arch).pdb -def:$(DEFFILE)
182
+
183
+
184
+
185
+ $(DEFFILE):
186
+ $(RUBY) -e "puts 'EXPORTS', 'Init_$(TARGET)'" > $@
187
+
188
+ $(OBJS): {.;$(VPATH)}ruby.h {.;$(VPATH)}defines.h
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+
3
+ create_makefile('langscan/pairmatcher/pairmatcher')
@@ -0,0 +1,890 @@
1
+ /*
2
+ * pairmatcher.c - a pair matching parser
3
+ *
4
+ * Copyright (C) 2005 Akira Tanaka <akr@m17n.org>
5
+ * All rights reserved.
6
+ * This is free software with ABSOLUTELY NO WARRANTY.
7
+ *
8
+ * You can redistribute it and/or modify it under the terms of
9
+ * the GNU General Public License version 2.
10
+ */
11
+
12
+ #include <ruby.h>
13
+
14
+ static ID id_get_token, id_new, id_call;
15
+ static VALUE Fragment;
16
+
17
+ #ifndef RSTRUCT_PTR
18
+ # define RSTRUCT_PTR(st) (RSTRUCT(st)->ptr)
19
+ #endif
20
+ #ifndef RSTRUCT_LEN
21
+ # define RSTRUCT_LEN(st) (RSTRUCT(st)->len)
22
+ #endif
23
+
24
+ #ifndef RARRAY_PTR
25
+ # define RARRAY_PTR(str) (RARRAY(str)->ptr)
26
+ #endif
27
+ #ifndef RARRAY_LEN
28
+ # define RARRAY_LEN(str) (RARRAY(str)->len)
29
+ #endif
30
+
31
+ #ifdef SYMBOL_P
32
+ # define Check_Symbol(val) do { if (!SYMBOL_P(val)) { Check_Type(val, T_SYMBOL); } } while (0)
33
+ #else
34
+ # define Check_Symbol(val) Check_Type(val, T_SYMBOL)
35
+ #endif
36
+
37
+ static VALUE
38
+ fragment_type(VALUE fragment)
39
+ {
40
+ VALUE val;
41
+ Check_Type(fragment, T_STRUCT);
42
+ if (RBASIC(fragment)->klass != Fragment) {
43
+ rb_raise(rb_eTypeError, "not fragment");
44
+ }
45
+ val = RSTRUCT_PTR(fragment)[0];
46
+ Check_Symbol(val);
47
+ return val;
48
+ }
49
+
50
+ static VALUE
51
+ fragment_text(VALUE fragment)
52
+ {
53
+ VALUE val;
54
+ Check_Type(fragment, T_STRUCT);
55
+ if (RBASIC(fragment)->klass != Fragment) {
56
+ rb_raise(rb_eTypeError, "not fragment");
57
+ }
58
+ val = RSTRUCT_PTR(fragment)[1];
59
+ StringValue(val);
60
+ return val;
61
+ }
62
+
63
+ static int
64
+ fragment_byteno(VALUE fragment)
65
+ {
66
+ VALUE val;
67
+ Check_Type(fragment, T_STRUCT);
68
+ if (RBASIC(fragment)->klass != Fragment) {
69
+ rb_raise(rb_eTypeError, "not fragment");
70
+ }
71
+ val = RSTRUCT_PTR(fragment)[3];
72
+ return NUM2INT(val);
73
+ }
74
+
75
+ typedef struct {
76
+ unsigned char before_open_max;
77
+ unsigned char after_open_max;
78
+ unsigned char before_close_max;
79
+ unsigned char after_close_max;
80
+ VALUE pair_defs;
81
+ VALUE intertoken_defs;
82
+ VALUE recent_tokens;
83
+ VALUE pair_stack;
84
+ VALUE closed_pairs;
85
+ } pairmatcher_t;
86
+
87
+ static void pairmatcher_mark(pairmatcher_t *pairmatcher)
88
+ {
89
+ if (pairmatcher == NULL)
90
+ return;
91
+ rb_gc_mark(pairmatcher->pair_defs);
92
+ rb_gc_mark(pairmatcher->intertoken_defs);
93
+ rb_gc_mark(pairmatcher->recent_tokens);
94
+ rb_gc_mark(pairmatcher->pair_stack);
95
+ rb_gc_mark(pairmatcher->closed_pairs);
96
+ }
97
+
98
+ static void pairmatcher_free(pairmatcher_t *pairmatcher)
99
+ {
100
+ if (pairmatcher == NULL)
101
+ return;
102
+ free((void *)pairmatcher);
103
+ }
104
+
105
+ static VALUE pairmatcher_s_allocate(VALUE klass)
106
+ {
107
+ return Data_Wrap_Struct(klass, pairmatcher_mark, pairmatcher_free, NULL);
108
+ }
109
+
110
+ static VALUE pairmatcher_initialize(
111
+ VALUE self,
112
+ VALUE before_open_max,
113
+ VALUE after_open_max,
114
+ VALUE before_close_max,
115
+ VALUE after_close_max)
116
+ {
117
+ pairmatcher_t *pairmatcher;
118
+
119
+ Data_Get_Struct(self, pairmatcher_t, pairmatcher);
120
+ if (pairmatcher != NULL) { rb_raise(rb_eArgError, "called twice"); }
121
+
122
+ pairmatcher = ALLOC(pairmatcher_t);
123
+ pairmatcher->pair_defs = Qnil;
124
+ pairmatcher->intertoken_defs = Qnil;
125
+ pairmatcher->recent_tokens = Qnil;
126
+ pairmatcher->pair_stack = Qnil;
127
+ pairmatcher->closed_pairs = Qnil;
128
+ DATA_PTR(self) = pairmatcher;
129
+
130
+ pairmatcher->before_open_max = NUM2INT(before_open_max);
131
+ pairmatcher->after_open_max = NUM2INT(after_open_max);
132
+ pairmatcher->before_close_max = NUM2INT(before_close_max);
133
+ pairmatcher->after_close_max = NUM2INT(after_close_max);
134
+ pairmatcher->pair_defs = rb_ary_new();
135
+ //RBASIC(pairmatcher->pair_defs)->klass = 0;
136
+ pairmatcher->intertoken_defs = rb_ary_new();
137
+ //RBASIC(pairmatcher->intertoken_defs)->klass = 0;
138
+ pairmatcher->recent_tokens = rb_ary_new();
139
+ //RBASIC(pairmatcher->recent_tokens)->klass = 0;
140
+ pairmatcher->pair_stack = rb_ary_new();
141
+ //RBASIC(pairmatcher->pair_stack)->klass = 0;
142
+ pairmatcher->closed_pairs = rb_ary_new();
143
+ //RBASIC(pairmatcher->closed_pairs)->klass = 0;
144
+ return self;
145
+ }
146
+
147
+ #define GetPM(obj, var) \
148
+ do { \
149
+ Data_Get_Struct((obj), pairmatcher_t, (var)); \
150
+ if ((var) == NULL) { rb_raise(rb_eArgError, "not initialized"); } \
151
+ } while(0)
152
+
153
+ static VALUE
154
+ pairmatcher_get_before_open_max(VALUE self)
155
+ {
156
+ pairmatcher_t *pairmatcher;
157
+ GetPM(self, pairmatcher);
158
+ if (pairmatcher == NULL) { rb_raise(rb_eArgError, "not initialized"); }
159
+ return INT2NUM(pairmatcher->before_open_max);
160
+ }
161
+
162
+ static VALUE
163
+ pairmatcher_get_after_open_max(VALUE self)
164
+ {
165
+ pairmatcher_t *pairmatcher;
166
+ GetPM(self, pairmatcher);
167
+ return INT2NUM(pairmatcher->after_open_max);
168
+ }
169
+
170
+ static VALUE
171
+ pairmatcher_get_before_close_max(VALUE self)
172
+ {
173
+ pairmatcher_t *pairmatcher;
174
+ GetPM(self, pairmatcher);
175
+ return INT2NUM(pairmatcher->before_close_max);
176
+ }
177
+
178
+ static VALUE
179
+ pairmatcher_get_after_close_max(VALUE self)
180
+ {
181
+ pairmatcher_t *pairmatcher;
182
+ GetPM(self, pairmatcher);
183
+ return INT2NUM(pairmatcher->after_close_max);
184
+ }
185
+
186
+ static VALUE
187
+ pairmatcher_define_pair(VALUE self, VALUE pair_type, VALUE open_type, VALUE open_text, VALUE close_type, VALUE close_text)
188
+ {
189
+ pairmatcher_t *pairmatcher;
190
+ VALUE def;
191
+ Check_Symbol(open_type);
192
+ if (open_text != Qnil) {
193
+ StringValue(open_text);
194
+ open_text = rb_str_new4(open_text);
195
+ }
196
+ Check_Symbol(close_type);
197
+ if (close_text != Qnil) {
198
+ StringValue(close_text);
199
+ close_text = rb_str_new4(close_text);
200
+ }
201
+
202
+ def = rb_ary_new3(5, open_type, open_text, close_type, close_text, pair_type);
203
+ //RBASIC(def)->klass = 0;
204
+
205
+ GetPM(self, pairmatcher);
206
+ rb_ary_push(pairmatcher->pair_defs, def);
207
+
208
+ return Qnil;
209
+ }
210
+
211
+ static VALUE
212
+ pairmatcher_define_intertoken_fragment(VALUE self, VALUE type, VALUE text)
213
+ {
214
+ pairmatcher_t *pairmatcher;
215
+ VALUE def;
216
+ Check_Symbol(type);
217
+ if (text != Qnil) {
218
+ StringValue(text);
219
+ text = rb_str_new4(text);
220
+ }
221
+
222
+ def = rb_ary_new3(2, type, text);
223
+ //RBASIC(def)->klass = 0;
224
+
225
+ GetPM(self, pairmatcher);
226
+ rb_ary_push(pairmatcher->intertoken_defs, def);
227
+
228
+ return Qnil;
229
+ }
230
+
231
+ static VALUE
232
+ get_token(VALUE tokenizer)
233
+ {
234
+ return rb_funcall(tokenizer, id_get_token, 0);
235
+ }
236
+
237
+ static VALUE
238
+ open_token_p(pairmatcher_t *pairmatcher, VALUE token)
239
+ {
240
+ int i;
241
+ VALUE type = fragment_type(token);
242
+ VALUE text = fragment_text(token);
243
+ for (i = 0; i < RARRAY_LEN(pairmatcher->pair_defs); i++) {
244
+ VALUE def = RARRAY_PTR(pairmatcher->pair_defs)[i];
245
+ VALUE open_type = RARRAY_PTR(def)[0];
246
+ VALUE open_text = RARRAY_PTR(def)[1];
247
+ if (open_type == Qnil || open_type == type) {
248
+ if (open_text == Qnil || rb_str_cmp(open_text, text) == 0) {
249
+ return def;
250
+ }
251
+ }
252
+ }
253
+ return Qfalse;
254
+ }
255
+
256
+ static VALUE
257
+ close_token_p(pairmatcher_t *pairmatcher, VALUE token, int *i)
258
+ {
259
+ VALUE type = fragment_type(token);
260
+ VALUE text = fragment_text(token);
261
+ for (; *i < RARRAY_LEN(pairmatcher->pair_defs); (*i)++) {
262
+ VALUE def = RARRAY_PTR(pairmatcher->pair_defs)[*i];
263
+ VALUE close_type = RARRAY_PTR(def)[2];
264
+ VALUE close_text = RARRAY_PTR(def)[3];
265
+ if (close_type == Qnil || close_type == type) {
266
+ if (close_text == Qnil || rb_str_cmp(close_text, text) == 0) {
267
+ return def;
268
+ }
269
+ }
270
+ }
271
+ return Qfalse;
272
+ }
273
+
274
+ #define pair_get_pair_def(pair) (RSTRUCT_PTR(pair)[0])
275
+ #define pair_get_before_open_len(pair) FIX2INT(RSTRUCT_PTR(pair)[1])
276
+ #define pair_get_around_open_tokens(pair) (RSTRUCT_PTR(pair)[2])
277
+ #define pair_get_before_close_len(pair) FIX2INT(RSTRUCT_PTR(pair)[3])
278
+ #define pair_get_around_close_tokens(pair) (RSTRUCT_PTR(pair)[4])
279
+ #define pair_get_outer(pair) (RSTRUCT_PTR(pair)[5])
280
+ #define pair_set_pair_def(pair, val) (RSTRUCT_PTR(pair)[0] = (val))
281
+ #define pair_set_before_open_len(pair, len) (RSTRUCT_PTR(pair)[1] = INT2FIX(len))
282
+ #define pair_set_around_open_tokens(pair, val) (RSTRUCT_PTR(pair)[2] = (val))
283
+ #define pair_set_before_close_len(pair, len) (RSTRUCT_PTR(pair)[3] = INT2FIX(len))
284
+ #define pair_set_around_close_tokens(pair, val) (RSTRUCT_PTR(pair)[4] = (val))
285
+
286
+ #define pair_get_after_open_len(pair) (RARRAY_LEN(pair_get_around_open_tokens(pair))-pair_get_before_open_len(pair)-1)
287
+ #define pair_get_after_close_len(pair) (RARRAY_LEN(pair_get_around_close_tokens(pair))-pair_get_before_close_len(pair)-1)
288
+
289
+ static VALUE Pair;
290
+
291
+ static VALUE
292
+ make_pair(VALUE pair_def, int before_open_len, VALUE around_open_tokens, VALUE outer)
293
+ {
294
+ VALUE pair = rb_struct_new(Pair,
295
+ pair_def,
296
+ INT2FIX(before_open_len),
297
+ around_open_tokens,
298
+ Qnil,
299
+ Qnil,
300
+ outer);
301
+ return pair;
302
+ }
303
+
304
+ static int
305
+ concat_recent_tokens(pairmatcher_t *pm, int max, VALUE ary)
306
+ {
307
+ int i;
308
+ if (RARRAY_LEN(pm->recent_tokens) <= max)
309
+ max = RARRAY_LEN(pm->recent_tokens);
310
+ for (i = 0; i < max; i++) {
311
+ rb_ary_push(ary, RARRAY_PTR(pm->recent_tokens)[RARRAY_LEN(pm->recent_tokens)-max+i]);
312
+ }
313
+ return max;
314
+ }
315
+
316
+ static void
317
+ put_open_token(pairmatcher_t *pm, VALUE open_token, VALUE pair_def)
318
+ {
319
+ int before_open_len, stack_len;
320
+ VALUE pair;
321
+ VALUE around_open_tokens;
322
+ around_open_tokens = rb_ary_new2(pm->before_open_max+1+pm->after_open_max);
323
+ before_open_len = concat_recent_tokens(pm, pm->before_open_max, around_open_tokens);
324
+ rb_ary_push(around_open_tokens, open_token);
325
+ stack_len = RARRAY_LEN(pm->pair_stack);
326
+ pair = make_pair(pair_def, before_open_len, around_open_tokens,
327
+ stack_len ? RARRAY_PTR(pm->pair_stack)[stack_len-1] : Qnil);
328
+ rb_ary_push(pm->pair_stack, pair);
329
+ }
330
+
331
+ static int
332
+ matching_open_depth(pairmatcher_t *pm, VALUE open_token, VALUE pair_def)
333
+ {
334
+ int i;
335
+ for (i = RARRAY_LEN(pm->pair_stack) - 1; 0 <= i; i--) {
336
+ if (pair_get_pair_def(RARRAY_PTR(pm->pair_stack)[i]) == pair_def) {
337
+ return i;
338
+ }
339
+ }
340
+ return -1;
341
+ }
342
+
343
+ static void
344
+ report_token_list_now(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len)
345
+ {
346
+ while (len) {
347
+ rb_funcall(reporter, id_call, 1, RARRAY_PTR(token_list)[beg]);
348
+ beg++;
349
+ len--;
350
+ }
351
+ }
352
+
353
+ static void
354
+ report_token_list_open_pair(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len)
355
+ {
356
+ int i;
357
+ for (i = RARRAY_LEN(pm->pair_stack) - 1; 0 <= i; i--) {
358
+ VALUE pair = RARRAY_PTR(pm->pair_stack)[i];
359
+ VALUE around_open = pair_get_around_open_tokens(pair);
360
+ int first_byteno = fragment_byteno(RARRAY_PTR(around_open)[0]);
361
+ int last_byteno = fragment_byteno(RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1]);
362
+ if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg])) {
363
+ report_token_list_now(pm, reporter, token_list, beg, len);
364
+ return;
365
+ }
366
+ /* last_byteno >= fragment_byteno(RARRAY_PTR(token_list)[beg]) */
367
+ if (fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) < first_byteno)
368
+ continue;
369
+ /*
370
+ * fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno
371
+ * first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])
372
+ */
373
+ if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])) {
374
+ int beg2 = beg+len-1;
375
+ int len2 = 1;
376
+ while (beg <= beg2 && last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg2-1])) {
377
+ beg2--;
378
+ len2++;
379
+ }
380
+ report_token_list_now(pm, reporter, token_list, beg2, len2);
381
+ len -= len2;
382
+ }
383
+ /*
384
+ * first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) <= last_byteno
385
+ * fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno
386
+ */
387
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])) {
388
+ len--;
389
+ }
390
+ if (len == 0)
391
+ return;
392
+ }
393
+ report_token_list_now(pm, reporter, token_list, beg, len);
394
+ }
395
+
396
+ static void
397
+ report_token_list_rec_closed_pair(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len, int closed_pairs_index)
398
+ {
399
+ VALUE pair;
400
+ VALUE around_close, around_open, first, last;
401
+ int first_byteno, last_byteno;
402
+ if (RARRAY_LEN(pm->closed_pairs) <= closed_pairs_index) {
403
+ report_token_list_open_pair(pm, reporter, token_list, beg, len);
404
+ return;
405
+ }
406
+ pair = RARRAY_PTR(pm->closed_pairs)[closed_pairs_index];
407
+ if (pair == Qnil) {
408
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
409
+ return;
410
+ }
411
+ around_close = pair_get_around_close_tokens(pair);
412
+ first = RARRAY_PTR(around_close)[0];
413
+ first_byteno = fragment_byteno(first);
414
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
415
+ len--;
416
+ if (len == 0)
417
+ return;
418
+ around_open = pair_get_around_open_tokens(pair);
419
+ first = RARRAY_PTR(around_open)[0];
420
+ first_byteno = fragment_byteno(first);
421
+ last = RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1];
422
+ last_byteno = fragment_byteno(last);
423
+ if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg]) ||
424
+ fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) < first_byteno) {
425
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
426
+ }
427
+ else if (first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg])) {
428
+ while (0 < len && fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno) {
429
+ beg++;
430
+ len--;
431
+ }
432
+ if (len)
433
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
434
+ }
435
+ else if (fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) <= last_byteno) {
436
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
437
+ len--;
438
+ if (len)
439
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
440
+ }
441
+ else {
442
+ int beg1, len1;
443
+ int beg2, len2;
444
+ beg1 = beg;
445
+ len1 = 1;
446
+ while (len1 < len && fragment_byteno(RARRAY_PTR(token_list)[beg1+len1]) < first_byteno)
447
+ len1++;
448
+ beg2 = beg + len - 1;
449
+ len2 = 1;
450
+ while (beg <= beg2-1 && last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg2-1])) {
451
+ beg2--;
452
+ len2++;
453
+ }
454
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg1, len1, closed_pairs_index+1);
455
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg2, len2, closed_pairs_index+1);
456
+ }
457
+ }
458
+
459
+ static void
460
+ report_token_list(pairmatcher_t *pm, VALUE token_list, int beg, int len, VALUE reporter)
461
+ {
462
+ if (len < 0)
463
+ len = RARRAY_LEN(token_list) - beg;
464
+ if (len == 0)
465
+ return;
466
+ if (RARRAY_LEN(pm->recent_tokens) != 0) {
467
+ int first_byteno = fragment_byteno(RARRAY_PTR(pm->recent_tokens)[0]);
468
+ if (first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[0]))
469
+ return;
470
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
471
+ len--;
472
+ }
473
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, 0);
474
+ }
475
+
476
+ static void
477
+ discard_unmatched_pair(pairmatcher_t *pm, VALUE reporter)
478
+ {
479
+ VALUE pair = rb_ary_pop(pm->pair_stack);
480
+ report_token_list(pm, pair_get_around_open_tokens(pair), 0, -1, reporter);
481
+ }
482
+
483
+ static void
484
+ put_close_token(pairmatcher_t *pm, VALUE close_token, VALUE pair_def, int depth, VALUE reporter)
485
+ {
486
+ VALUE pair, around_close_tokens;
487
+ int before_close_len;
488
+ while (depth+1 < RARRAY_LEN(pm->pair_stack)) {
489
+ discard_unmatched_pair(pm, reporter);
490
+ }
491
+ pair = rb_ary_pop(pm->pair_stack);
492
+ around_close_tokens = rb_ary_new2(pm->before_close_max+1+pm->after_close_max);
493
+ before_close_len = concat_recent_tokens(pm, pm->before_close_max, around_close_tokens);
494
+ rb_ary_push(around_close_tokens, close_token);
495
+ pair_set_before_close_len(pair, before_close_len);
496
+ pair_set_around_close_tokens(pair, around_close_tokens);
497
+ rb_ary_push(pm->closed_pairs, pair);
498
+ }
499
+
500
+ static void
501
+ add_recent(pairmatcher_t *pm, VALUE reporter, VALUE token)
502
+ {
503
+ int max = pm->before_open_max;
504
+ if (max < pm->before_close_max)
505
+ max = pm->before_close_max;
506
+ if (max == 0)
507
+ return;
508
+ if (RARRAY_LEN(pm->recent_tokens) < max) {
509
+ rb_ary_push(pm->recent_tokens, token);
510
+ }
511
+ else {
512
+ VALUE val = RARRAY_PTR(pm->recent_tokens)[0];
513
+ MEMMOVE(RARRAY_PTR(pm->recent_tokens),
514
+ RARRAY_PTR(pm->recent_tokens)+1,
515
+ VALUE, max-1);
516
+ RARRAY_PTR(pm->recent_tokens)[max-1] = token;
517
+ report_token_list(pm, rb_ary_new3(1, val), 0, -1, reporter);
518
+ }
519
+ }
520
+
521
+ static void
522
+ add_after_open(pairmatcher_t *pm, VALUE token)
523
+ {
524
+ int i;
525
+ int max = pm->after_open_max;
526
+ VALUE pair;
527
+ for (i = RARRAY_LEN(pm->pair_stack)-1; 0 <= i; i--) {
528
+ pair = RARRAY_PTR(pm->pair_stack)[i];
529
+ if (max <= pair_get_after_open_len(pair))
530
+ break;
531
+ rb_ary_push(pair_get_around_open_tokens(pair), token);
532
+ }
533
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
534
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
535
+ if (pair == Qnil)
536
+ continue;
537
+ if (pair_get_after_open_len(pair) < max)
538
+ rb_ary_push(pair_get_around_open_tokens(pair), token);
539
+ }
540
+ }
541
+
542
+ static void
543
+ add_after_close(pairmatcher_t *pm, VALUE token)
544
+ {
545
+ int i;
546
+ int max = pm->after_close_max;
547
+ VALUE pair;
548
+ for (i = RARRAY_LEN(pm->closed_pairs) - 1; 0 <= i; i--) {
549
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
550
+ if (pair == Qnil)
551
+ continue;
552
+ if (max <= pair_get_after_close_len(pair))
553
+ break;
554
+ rb_ary_push(pair_get_around_close_tokens(pair), token);
555
+ }
556
+ }
557
+
558
+ static void
559
+ discard_matched_pair(pairmatcher_t *pm, VALUE pair, VALUE reporter)
560
+ {
561
+ VALUE around_open = pair_get_around_open_tokens(pair);
562
+ VALUE around_close = pair_get_around_close_tokens(pair);
563
+ if (fragment_byteno(RARRAY_PTR(around_close)[0]) <=
564
+ fragment_byteno(RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1])) {
565
+ int len1 = RARRAY_LEN(around_open);
566
+ while (0 < len1 && fragment_byteno(RARRAY_PTR(around_close)[0]) <= fragment_byteno(RARRAY_PTR(around_open)[len1-1]))
567
+ len1--;
568
+ if (len1) {
569
+ report_token_list(pm, around_open, 0, len1, reporter);
570
+ }
571
+ report_token_list(pm, around_close, 0, -1, reporter);
572
+ }
573
+ else {
574
+ report_token_list(pm, around_open, 0, -1, reporter);
575
+ report_token_list(pm, around_close, 0, -1, reporter);
576
+ }
577
+ }
578
+
579
+ static VALUE
580
+ ary_subseq(VALUE ary, int beg, int len)
581
+ {
582
+ VALUE argv[2];
583
+ argv[0] = INT2NUM(beg);
584
+ argv[1] = INT2NUM(len);
585
+ return rb_ary_aref(2, argv, ary);
586
+ }
587
+
588
+ static void
589
+ yield_pair(VALUE pair)
590
+ {
591
+ rb_yield(pair);
592
+ }
593
+
594
+ static VALUE
595
+ pair_before_open_tokens(VALUE pair)
596
+ {
597
+ int before_len;
598
+ VALUE around_open = pair_get_around_open_tokens(pair);
599
+ before_len = pair_get_before_open_len(pair);
600
+ return ary_subseq(around_open, 0, before_len);
601
+ }
602
+
603
+ static VALUE
604
+ pair_around_open(VALUE pair, VALUE index)
605
+ {
606
+ int before_len;
607
+ int i = NUM2INT(index);
608
+ VALUE around_open = pair_get_around_open_tokens(pair);
609
+ before_len = pair_get_before_open_len(pair);
610
+ if (-before_len <= i && i <= pair_get_after_open_len(pair))
611
+ return rb_ary_entry(around_open, before_len+i);
612
+ else
613
+ return Qnil;
614
+ }
615
+
616
+ static VALUE
617
+ pair_open_token(VALUE pair)
618
+ {
619
+ int before_len;
620
+ VALUE around_open = pair_get_around_open_tokens(pair);
621
+ before_len = pair_get_before_open_len(pair);
622
+ return rb_ary_entry(around_open, before_len);
623
+ }
624
+
625
+ static VALUE
626
+ pair_after_open_tokens(VALUE pair)
627
+ {
628
+ int before_len, after_len;
629
+ VALUE around_open = pair_get_around_open_tokens(pair);
630
+ before_len = pair_get_before_open_len(pair);
631
+ after_len = pair_get_after_open_len(pair);
632
+ return ary_subseq(around_open, before_len+1, after_len);
633
+ return ary_subseq(around_open, 0, before_len);
634
+ }
635
+
636
+ static VALUE
637
+ pair_before_close_tokens(VALUE pair)
638
+ {
639
+ int before_len;
640
+ VALUE around_close = pair_get_around_close_tokens(pair);
641
+ if (around_close == Qnil) return Qnil;
642
+ before_len = pair_get_before_close_len(pair);
643
+ return ary_subseq(around_close, 0, before_len);
644
+ }
645
+
646
+ static VALUE
647
+ pair_around_close(VALUE pair, VALUE index)
648
+ {
649
+ int before_len;
650
+ int i = NUM2INT(index);
651
+ VALUE around_close = pair_get_around_close_tokens(pair);
652
+ if (around_close == Qnil) return Qnil;
653
+ before_len = pair_get_before_close_len(pair);
654
+ if (-before_len <= i && i <= pair_get_after_close_len(pair))
655
+ return rb_ary_entry(around_close, before_len+i);
656
+ else
657
+ return Qnil;
658
+ }
659
+
660
+ static VALUE
661
+ pair_close_token(VALUE pair)
662
+ {
663
+ int before_len;
664
+ VALUE around_close = pair_get_around_close_tokens(pair);
665
+ if (around_close == Qnil) return Qnil;
666
+ before_len = pair_get_before_close_len(pair);
667
+ return rb_ary_entry(around_close, before_len);
668
+ }
669
+
670
+ static VALUE
671
+ pair_after_close_tokens(VALUE pair)
672
+ {
673
+ int before_len, after_len;
674
+ VALUE around_close = pair_get_around_close_tokens(pair);
675
+ if (around_close == Qnil) return Qnil;
676
+ before_len = pair_get_before_close_len(pair);
677
+ after_len = pair_get_after_close_len(pair);
678
+ return ary_subseq(around_close, before_len+1, after_len);
679
+ return ary_subseq(around_close, 0, before_len);
680
+ }
681
+
682
+ static VALUE
683
+ pair_before_open_length(VALUE pair)
684
+ {
685
+ return INT2FIX(pair_get_before_open_len(pair));
686
+ }
687
+
688
+ static VALUE
689
+ pair_after_open_length(VALUE pair)
690
+ {
691
+ return INT2FIX(pair_get_after_open_len(pair));
692
+ }
693
+
694
+ static VALUE
695
+ pair_before_close_length(VALUE pair)
696
+ {
697
+ VALUE around_close = pair_get_around_close_tokens(pair);
698
+ if (around_close == Qnil) return Qnil;
699
+ return INT2FIX(pair_get_before_close_len(pair));
700
+ }
701
+
702
+ static VALUE
703
+ pair_after_close_length(VALUE pair)
704
+ {
705
+ VALUE around_close = pair_get_around_close_tokens(pair);
706
+ if (around_close == Qnil) return Qnil;
707
+ return INT2FIX(pair_get_after_close_len(pair));
708
+ }
709
+
710
+ static VALUE
711
+ pair_pair_type(VALUE pair)
712
+ {
713
+ VALUE def = pair_get_pair_def(pair);
714
+ return RARRAY_PTR(def)[4];
715
+ }
716
+
717
+ static void
718
+ check_closed_pairs(pairmatcher_t *pm, VALUE reporter)
719
+ {
720
+ int i, j;
721
+ int after_open_max = pm->after_open_max;
722
+ int after_close_max = pm->after_close_max;
723
+ VALUE pair;
724
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
725
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
726
+ if (pair == Qnil)
727
+ continue;
728
+ if (pair_get_after_open_len(pair) == after_open_max &&
729
+ pair_get_after_close_len(pair) == after_close_max) {
730
+ RARRAY_PTR(pm->closed_pairs)[i] = Qnil;
731
+ yield_pair(pair);
732
+ discard_matched_pair(pm, pair, reporter);
733
+ }
734
+ }
735
+ j = 0;
736
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
737
+ if (RARRAY_PTR(pm->closed_pairs)[i] != Qnil) {
738
+ RARRAY_PTR(pm->closed_pairs)[j] = RARRAY_PTR(pm->closed_pairs)[i];
739
+ j++;
740
+ }
741
+ }
742
+ while (j < RARRAY_LEN(pm->closed_pairs))
743
+ rb_ary_pop(pm->closed_pairs);
744
+ }
745
+
746
+ static void
747
+ put_token(pairmatcher_t *pairmatcher, VALUE token, VALUE reporter)
748
+ {
749
+ VALUE pair_def, tmp_pair_def;
750
+ int depth, max_depth, i;
751
+ add_after_open(pairmatcher, token);
752
+ add_after_close(pairmatcher, token);
753
+ check_closed_pairs(pairmatcher, reporter);
754
+
755
+ if ((pair_def = open_token_p(pairmatcher, token))) {
756
+ put_open_token(pairmatcher, token, pair_def);
757
+ }
758
+ else {
759
+ i = 0;
760
+ max_depth = 0;
761
+ pair_def = Qfalse;
762
+ while ((tmp_pair_def = close_token_p(pairmatcher, token, &i)) != Qfalse) {
763
+ depth = matching_open_depth(pairmatcher, token, tmp_pair_def);
764
+ if (max_depth <= depth) {
765
+ pair_def = tmp_pair_def;
766
+ max_depth = depth;
767
+ }
768
+ i++;
769
+ }
770
+ if (pair_def != Qfalse) {
771
+ put_close_token(pairmatcher, token, pair_def, max_depth, reporter);
772
+ }
773
+ }
774
+ add_recent(pairmatcher, reporter, token);
775
+ }
776
+
777
+ static int
778
+ intertoken_p(pairmatcher_t *pairmatcher, VALUE token_type)
779
+ {
780
+ int i;
781
+ for (i = 0; i < RARRAY_LEN(pairmatcher->intertoken_defs); i++) {
782
+ VALUE def = RARRAY_PTR(pairmatcher->intertoken_defs)[i];
783
+ VALUE def_type = RARRAY_PTR(def)[0];
784
+ //VALUE def_text = RARRAY_PTR(def)[1];
785
+ if (def_type == token_type) {
786
+ return 1;
787
+ }
788
+ }
789
+ return 0;
790
+ }
791
+
792
+ static void
793
+ finish(pairmatcher_t *pm, VALUE reporter)
794
+ {
795
+ int i;
796
+ VALUE pair;
797
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
798
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
799
+ if (pair == Qnil)
800
+ continue;
801
+ RARRAY_PTR(pm->closed_pairs)[i] = Qnil;
802
+ yield_pair(pair);
803
+ discard_matched_pair(pm, pair, reporter);
804
+ }
805
+ while (RARRAY_LEN(pm->pair_stack)) {
806
+ discard_unmatched_pair(pm, reporter);
807
+ }
808
+ report_token_list_now(pm, reporter, pm->recent_tokens, 0, RARRAY_LEN(pm->recent_tokens));
809
+ }
810
+
811
+ static void
812
+ parse(pairmatcher_t *pm, VALUE tokenizer, VALUE reporter)
813
+ {
814
+ VALUE token_info;
815
+ while ((token_info = get_token(tokenizer)) != Qnil) {
816
+ VALUE token_type, token_text, token_lineno, token_byteno;
817
+ VALUE token;
818
+ Check_Type(token_info, T_ARRAY);
819
+ if (RARRAY_LEN(token_info) != 8) {
820
+ rb_raise(rb_eArgError, "unexpected token");
821
+ }
822
+ token_type = RARRAY_PTR(token_info)[0];
823
+ token_text = RARRAY_PTR(token_info)[1];
824
+ token_lineno = RARRAY_PTR(token_info)[2];
825
+ token_byteno = RARRAY_PTR(token_info)[4];
826
+ token = rb_funcall(Fragment, id_new, 4, token_type, token_text, token_lineno, token_byteno);
827
+ if (intertoken_p(pm, token_type)) {
828
+ rb_funcall(reporter, id_call, 1, token);
829
+ }
830
+ else {
831
+ put_token(pm, token, reporter);
832
+ }
833
+ }
834
+ finish(pm, reporter);
835
+ }
836
+
837
+ static VALUE
838
+ pairmatcher_parse(VALUE self, VALUE tokenizer, VALUE reporter)
839
+ {
840
+ pairmatcher_t *pairmatcher;
841
+ GetPM(self, pairmatcher);
842
+ parse(pairmatcher, tokenizer, reporter);
843
+
844
+ return Qnil;
845
+ }
846
+
847
+ void Init_pairmatcher(void)
848
+ {
849
+ VALUE LangScan = rb_const_get(rb_cObject, rb_intern("LangScan"));
850
+ VALUE PairMatcher = rb_define_class_under(LangScan, "PairMatcher", rb_cData);
851
+ Fragment = rb_const_get(LangScan, rb_intern("Fragment"));
852
+ rb_global_variable(&Fragment);
853
+
854
+ id_get_token = rb_intern("get_token");
855
+ id_new = rb_intern("new");
856
+ id_call = rb_intern("call");
857
+
858
+ rb_define_alloc_func(PairMatcher, pairmatcher_s_allocate);
859
+ rb_define_method(PairMatcher, "initialize", pairmatcher_initialize, 4);
860
+ //rb_define_method(PairMatcher, "initialize_copy", pairmatcher_initialize_copy, 1);
861
+ rb_define_method(PairMatcher, "define_intertoken_fragment", pairmatcher_define_intertoken_fragment, 2);
862
+ rb_define_method(PairMatcher, "define_pair", pairmatcher_define_pair, 5);
863
+ rb_define_method(PairMatcher, "before_open_max", pairmatcher_get_before_open_max, 0);
864
+ rb_define_method(PairMatcher, "after_open_max", pairmatcher_get_after_open_max, 0);
865
+ rb_define_method(PairMatcher, "before_close_max", pairmatcher_get_before_close_max, 0);
866
+ rb_define_method(PairMatcher, "after_close_max", pairmatcher_get_after_close_max, 0);
867
+ rb_define_method(PairMatcher, "parse", pairmatcher_parse, 2);
868
+
869
+ Pair = rb_struct_define("LangScanPair",
870
+ "pair_def",
871
+ "before_open_len",
872
+ "around_open_tokens",
873
+ "before_close_len",
874
+ "around_close_tokens",
875
+ "outer",
876
+ NULL);
877
+ rb_define_method(Pair, "before_open_tokens", pair_before_open_tokens, 0);
878
+ rb_define_method(Pair, "before_open_length", pair_before_open_length, 0);
879
+ rb_define_method(Pair, "around_open", pair_around_open, 1);
880
+ rb_define_method(Pair, "open_token", pair_open_token, 0);
881
+ rb_define_method(Pair, "after_open_tokens", pair_after_open_tokens, 0);
882
+ rb_define_method(Pair, "after_open_length", pair_after_open_length, 0);
883
+ rb_define_method(Pair, "before_close_tokens", pair_before_close_tokens, 0);
884
+ rb_define_method(Pair, "before_close_length", pair_before_close_length, 0);
885
+ rb_define_method(Pair, "around_close", pair_around_close, 1);
886
+ rb_define_method(Pair, "close_token", pair_close_token, 0);
887
+ rb_define_method(Pair, "after_close_tokens", pair_after_close_tokens, 0);
888
+ rb_define_method(Pair, "after_close_length", pair_after_close_length, 0);
889
+ rb_define_method(Pair, "pair_type", pair_pair_type, 0);
890
+ }