langscan 1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (168) hide show
  1. data/AUTHORS.txt +19 -0
  2. data/History.txt +126 -0
  3. data/Manifest.txt +167 -0
  4. data/README.rdoc +89 -0
  5. data/Rakefile +40 -0
  6. data/ext/langscan/_make_c.rb +20 -0
  7. data/ext/langscan/_make_h.rb +30 -0
  8. data/ext/langscan/_template.c +134 -0
  9. data/ext/langscan/_template.h +53 -0
  10. data/ext/langscan/c/c/Makefile +157 -0
  11. data/ext/langscan/c/c/c.c +134 -0
  12. data/ext/langscan/c/c/c.h +66 -0
  13. data/ext/langscan/c/c/ctok.c +4622 -0
  14. data/ext/langscan/c/c/ctok.l +212 -0
  15. data/ext/langscan/c/c/extconf.rb +3 -0
  16. data/ext/langscan/c/c/modulename.txt +1 -0
  17. data/ext/langscan/c/c/tokenlist.txt +13 -0
  18. data/ext/langscan/csharp/csharp/Makefile +157 -0
  19. data/ext/langscan/csharp/csharp/csharp.c +134 -0
  20. data/ext/langscan/csharp/csharp/csharp.h +65 -0
  21. data/ext/langscan/csharp/csharp/csharptok.c +2965 -0
  22. data/ext/langscan/csharp/csharp/csharptok.l +200 -0
  23. data/ext/langscan/csharp/csharp/extconf.rb +3 -0
  24. data/ext/langscan/csharp/csharp/modulename.txt +1 -0
  25. data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
  26. data/ext/langscan/d/d/Makefile +157 -0
  27. data/ext/langscan/d/d/d.c +134 -0
  28. data/ext/langscan/d/d/d.h +64 -0
  29. data/ext/langscan/d/d/dtok.c +5461 -0
  30. data/ext/langscan/d/d/dtok.l +282 -0
  31. data/ext/langscan/d/d/extconf.rb +3 -0
  32. data/ext/langscan/d/d/modulename.txt +1 -0
  33. data/ext/langscan/d/d/tokenlist.txt +11 -0
  34. data/ext/langscan/elisp/elisp/Makefile +157 -0
  35. data/ext/langscan/elisp/elisp/elisp.c +134 -0
  36. data/ext/langscan/elisp/elisp/elisp.h +62 -0
  37. data/ext/langscan/elisp/elisp/elisptok.c +2101 -0
  38. data/ext/langscan/elisp/elisp/elisptok.l +151 -0
  39. data/ext/langscan/elisp/elisp/extconf.rb +3 -0
  40. data/ext/langscan/elisp/elisp/modulename.txt +1 -0
  41. data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
  42. data/ext/langscan/java/java/Makefile +157 -0
  43. data/ext/langscan/java/java/extconf.rb +3 -0
  44. data/ext/langscan/java/java/java.c +134 -0
  45. data/ext/langscan/java/java/java.h +64 -0
  46. data/ext/langscan/java/java/javatok.c +2090 -0
  47. data/ext/langscan/java/java/javatok.l +155 -0
  48. data/ext/langscan/java/java/modulename.txt +1 -0
  49. data/ext/langscan/java/java/tokenlist.txt +11 -0
  50. data/ext/langscan/javascript/javascript/Makefile +157 -0
  51. data/ext/langscan/javascript/javascript/extconf.rb +3 -0
  52. data/ext/langscan/javascript/javascript/javascript.c +134 -0
  53. data/ext/langscan/javascript/javascript/javascript.h +63 -0
  54. data/ext/langscan/javascript/javascript/javascripttok.c +2051 -0
  55. data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
  56. data/ext/langscan/javascript/javascript/modulename.txt +1 -0
  57. data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
  58. data/ext/langscan/pairmatcher/pairmatcher/Makefile +157 -0
  59. data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
  60. data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
  61. data/ext/langscan/php/php/Makefile +157 -0
  62. data/ext/langscan/php/php/extconf.rb +3 -0
  63. data/ext/langscan/php/php/modulename.txt +1 -0
  64. data/ext/langscan/php/php/php.c +134 -0
  65. data/ext/langscan/php/php/php.h +64 -0
  66. data/ext/langscan/php/php/phptok.c +2406 -0
  67. data/ext/langscan/php/php/phptok.l +212 -0
  68. data/ext/langscan/php/php/tokenlist.txt +11 -0
  69. data/ext/langscan/post-distclean.rb +21 -0
  70. data/ext/langscan/pre-config.rb +57 -0
  71. data/ext/langscan/python/python/Makefile +157 -0
  72. data/ext/langscan/python/python/extconf.rb +3 -0
  73. data/ext/langscan/python/python/modulename.txt +1 -0
  74. data/ext/langscan/python/python/python.c +134 -0
  75. data/ext/langscan/python/python/python.h +61 -0
  76. data/ext/langscan/python/python/pythontok.c +2102 -0
  77. data/ext/langscan/python/python/pythontok.l +155 -0
  78. data/ext/langscan/python/python/tokenlist.txt +8 -0
  79. data/ext/langscan/ruby/compat/ripper/Makefile +158 -0
  80. data/ext/langscan/ruby/compat/ripper/depend +1 -0
  81. data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
  82. data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
  83. data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
  84. data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
  85. data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
  86. data/ext/langscan/scheme/scheme/Makefile +157 -0
  87. data/ext/langscan/scheme/scheme/extconf.rb +3 -0
  88. data/ext/langscan/scheme/scheme/modulename.txt +1 -0
  89. data/ext/langscan/scheme/scheme/scheme.c +134 -0
  90. data/ext/langscan/scheme/scheme/scheme.h +60 -0
  91. data/ext/langscan/scheme/scheme/schemetok.c +2447 -0
  92. data/ext/langscan/scheme/scheme/schemetok.l +177 -0
  93. data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
  94. data/ext/langscan/sh/sh/Makefile +157 -0
  95. data/ext/langscan/sh/sh/extconf.rb +3 -0
  96. data/ext/langscan/sh/sh/modulename.txt +1 -0
  97. data/ext/langscan/sh/sh/sh.c +134 -0
  98. data/ext/langscan/sh/sh/sh.h +61 -0
  99. data/ext/langscan/sh/sh/shtok.c +2470 -0
  100. data/ext/langscan/sh/sh/shtok.l +325 -0
  101. data/ext/langscan/sh/sh/tokenlist.txt +8 -0
  102. data/lib/langscan.rb +124 -0
  103. data/lib/langscan/_common.rb +50 -0
  104. data/lib/langscan/_easyscanner.rb +78 -0
  105. data/lib/langscan/_pairmatcher.rb +46 -0
  106. data/lib/langscan/_type.rb +125 -0
  107. data/lib/langscan/autoconf.rb +51 -0
  108. data/lib/langscan/automake.rb +51 -0
  109. data/lib/langscan/brainfuck.rb +48 -0
  110. data/lib/langscan/c.rb +144 -0
  111. data/lib/langscan/csharp.rb +101 -0
  112. data/lib/langscan/css.rb +109 -0
  113. data/lib/langscan/d.rb +201 -0
  114. data/lib/langscan/eiffel.rb +167 -0
  115. data/lib/langscan/elisp.rb +132 -0
  116. data/lib/langscan/io.rb +84 -0
  117. data/lib/langscan/java.rb +95 -0
  118. data/lib/langscan/javascript.rb +97 -0
  119. data/lib/langscan/lua.rb +116 -0
  120. data/lib/langscan/ocaml.rb +298 -0
  121. data/lib/langscan/ocaml/camlexer.ml +28 -0
  122. data/lib/langscan/ocaml/lexer.mll +230 -0
  123. data/lib/langscan/ocaml/types.ml +36 -0
  124. data/lib/langscan/perl.rb +87 -0
  125. data/lib/langscan/perl/tokenizer.pl +231 -0
  126. data/lib/langscan/php.rb +80 -0
  127. data/lib/langscan/python.rb +101 -0
  128. data/lib/langscan/rpmspec.rb +71 -0
  129. data/lib/langscan/ruby.rb +164 -0
  130. data/lib/langscan/ruby/compat/README +5 -0
  131. data/lib/langscan/ruby/compat/ripper.rb +4 -0
  132. data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
  133. data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
  134. data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
  135. data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
  136. data/lib/langscan/scheme.rb +160 -0
  137. data/lib/langscan/sh.rb +116 -0
  138. data/lib/langscan/text.rb +37 -0
  139. data/metaconfig +2 -0
  140. data/script/console +10 -0
  141. data/script/destroy +14 -0
  142. data/script/generate +14 -0
  143. data/script/makemanifest.rb +21 -0
  144. data/setup.rb +1604 -0
  145. data/tasks/extconf.rake +13 -0
  146. data/tasks/extconf/langscan.rake +42 -0
  147. data/test/langscan/brainfuck/test/test_scan.rb +55 -0
  148. data/test/langscan/c/test/test_scan.rb +216 -0
  149. data/test/langscan/c/test/test_token.rb +41 -0
  150. data/test/langscan/csharp/test/test_scan.rb +157 -0
  151. data/test/langscan/css/test/test_css.rb +79 -0
  152. data/test/langscan/d/test/test_scan.rb +233 -0
  153. data/test/langscan/d/test/test_token.rb +205 -0
  154. data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
  155. data/test/langscan/elisp/test/test_elisp.rb +177 -0
  156. data/test/langscan/io/test/test_io.rb +79 -0
  157. data/test/langscan/java/test/test_java.rb +74 -0
  158. data/test/langscan/javascript/test/test_javascript.rb +39 -0
  159. data/test/langscan/lua/test/test_lua.rb +69 -0
  160. data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
  161. data/test/langscan/php/test/test_scan.rb +138 -0
  162. data/test/langscan/python/test/test_scan.rb +105 -0
  163. data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
  164. data/test/langscan/ruby/test/test_scan.rb +71 -0
  165. data/test/langscan/scheme/test/test_scan.rb +198 -0
  166. data/test/test_helper.rb +7 -0
  167. data/test/test_langscan.rb +123 -0
  168. metadata +296 -0
@@ -0,0 +1,147 @@
1
+ /*
2
+ * javascript.l - a lex rule for JavaScript
3
+ *
4
+ * Copyright (C) 2005 Keisuke Nishida <knishida@open-cobol.org>
5
+ * All rights reserved.
6
+ * This is free software with ABSOLUTELY NO WARRANTY.
7
+ *
8
+ * You can redistribute it and/or modify it under the terms of
9
+ * the GNU General Public License version 2.
10
+ */
11
+
12
+ %option reentrant
13
+ %option prefix="langscan_javascript_lex_"
14
+ %option noyywrap
15
+ %option nodefault
16
+
17
+ slash \/
18
+ star \*
19
+ nonstar [^\*]
20
+ nonslashstar [^\/\*]
21
+ commentcontent {star}+{nonslashstar}{nonstar}*
22
+ comment {slash}{star}{nonstar}*{commentcontent}*{star}+{slash}
23
+
24
+ %{
25
+
26
+ #include "javascript.h"
27
+
28
+ #define YY_EXTRA_TYPE langscan_javascript_lex_extra_t *
29
+
30
+ #if YY_NULL != 0
31
+ #error "YY_NULL is not 0."
32
+ #endif
33
+
34
+ #define YY_DECL langscan_javascript_token_t langscan_javascript_lex_lex(yyscan_t yyscanner)
35
+
36
+ #define YY_INPUT(buf,result,max_size) \
37
+ if (!yyextra->eof) { \
38
+ result = yyextra->user_read(&(yyextra->user_data), (buf), (max_size)); \
39
+ if (result == 0) \
40
+ yyextra->eof = 1; \
41
+ }
42
+
43
+ #define UPD update_pos(yyextra, yytext, yyleng)
44
+ static void update_pos(langscan_javascript_lex_extra_t *, char *, int);
45
+
46
+ #define report(token) \
47
+ do { \
48
+ yyextra->text = yytext; \
49
+ yyextra->leng = yyleng; \
50
+ return langscan_javascript_##token; \
51
+ } while (0)
52
+
53
+ %}
54
+
55
+ %%
56
+ [ \t\f\r]+ { UPD; report(space); }
57
+ \n { UPD; report(space); }
58
+ "//".* { UPD; report(comment); }
59
+ {comment} { UPD; report(comment); }
60
+ \"([^\\\"]|\\.)*\" { UPD; report(string); }
61
+ [A-Za-z_][0-9A-Za-z_]* { UPD; report(ident); }
62
+ . { UPD; report(punct); }
63
+
64
+ %%
65
+
66
+ static void update_pos(
67
+ langscan_javascript_lex_extra_t *extra,
68
+ char *text,
69
+ int leng)
70
+ {
71
+ int i, j;
72
+ extra->beg_byteno = extra->end_byteno;
73
+ extra->beg_lineno = extra->end_lineno;
74
+ extra->beg_columnno = extra->end_columnno;
75
+ j = 0;
76
+ for (i = 0; i < leng; i++) {
77
+ if (text[i] == '\n') {
78
+ extra->end_lineno++;
79
+ j = i + 1;
80
+ extra->end_columnno = 0;
81
+ }
82
+ }
83
+ extra->end_columnno += leng - j;
84
+ extra->end_byteno += leng;
85
+ }
86
+
87
+ langscan_javascript_tokenizer_t *langscan_javascript_make_tokenizer(
88
+ size_t (*user_read)(void **user_data_p, char *buf, size_t maxlen),
89
+ void *user_data)
90
+ {
91
+ langscan_javascript_tokenizer_t *tokenizer;
92
+ langscan_javascript_lex_extra_t *extra;
93
+ tokenizer = (langscan_javascript_tokenizer_t *)malloc(sizeof(langscan_javascript_tokenizer_t));
94
+ if (tokenizer == NULL)
95
+ return NULL;
96
+ extra = (langscan_javascript_lex_extra_t *)malloc(sizeof(langscan_javascript_lex_extra_t));
97
+ if (extra == NULL)
98
+ return NULL;
99
+ extra->user_read = user_read;
100
+ extra->user_data = user_data;
101
+ extra->beg_lineno = 1;
102
+ extra->beg_columnno = 0;
103
+ extra->beg_byteno = 0;
104
+ extra->end_lineno = 1;
105
+ extra->end_columnno = 0;
106
+ extra->end_byteno = 0;
107
+ extra->eof = 0;
108
+ tokenizer->extra = extra;
109
+ langscan_javascript_lex_lex_init(&tokenizer->scanner);
110
+ langscan_javascript_lex_set_extra(extra, tokenizer->scanner);
111
+ return tokenizer;
112
+ }
113
+
114
+ langscan_javascript_token_t langscan_javascript_get_token(langscan_javascript_tokenizer_t *tokenizer)
115
+ {
116
+ return langscan_javascript_lex_lex(tokenizer->scanner);
117
+ }
118
+
119
+ void langscan_javascript_free_tokenizer(langscan_javascript_tokenizer_t *tokenizer)
120
+ {
121
+ langscan_javascript_lex_extra_t *extra = langscan_javascript_lex_get_extra(tokenizer->scanner);
122
+ free((void *)extra);
123
+ langscan_javascript_lex_lex_destroy(tokenizer->scanner);
124
+ free((void *)tokenizer);
125
+ }
126
+
127
+ user_read_t langscan_javascript_tokenizer_get_user_read(langscan_javascript_tokenizer_t *tokenizer)
128
+ {
129
+ return tokenizer->extra->user_read;
130
+ }
131
+
132
+ void *langscan_javascript_tokenizer_get_user_data(langscan_javascript_tokenizer_t *tokenizer)
133
+ {
134
+ return tokenizer->extra->user_data;
135
+ }
136
+
137
+ const char *langscan_javascript_token_name(langscan_javascript_token_t token)
138
+ {
139
+ static char *token_names[] = {
140
+ "*eof*",
141
+ #define LANGSCAN_JAVASCRIPT_TOKEN(name) #name,
142
+ LANGSCAN_JAVASCRIPT_TOKEN_LIST
143
+ #undef LANGSCAN_JAVASCRIPT_TOKEN
144
+ };
145
+
146
+ return token_names[token];
147
+ }
@@ -0,0 +1 @@
1
+ JavaScript
@@ -0,0 +1,10 @@
1
+ preproc_beg
2
+ preproc_end
3
+ character
4
+ integer
5
+ floating
6
+ string
7
+ ident
8
+ punct
9
+ comment
10
+ space
@@ -0,0 +1,157 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = /Users/osuka/devel/git/langscan/ext/langscan/pairmatcher/pairmatcher
7
+ topdir = /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin10.0
8
+ hdrdir = $(topdir)
9
+ VPATH = $(srcdir):$(topdir):$(hdrdir)
10
+ exec_prefix = $(prefix)
11
+ prefix = $(DESTDIR)/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr
12
+ sharedstatedir = $(prefix)/com
13
+ mandir = $(DESTDIR)/usr/share/man
14
+ psdir = $(docdir)
15
+ oldincludedir = $(DESTDIR)/usr/include
16
+ localedir = $(datarootdir)/locale
17
+ bindir = $(exec_prefix)/bin
18
+ libexecdir = $(exec_prefix)/libexec
19
+ sitedir = $(DESTDIR)/Library/Ruby/Site
20
+ htmldir = $(docdir)
21
+ vendorarchdir = $(vendorlibdir)/$(sitearch)
22
+ includedir = $(prefix)/include
23
+ infodir = $(DESTDIR)/usr/share/info
24
+ vendorlibdir = $(vendordir)/$(ruby_version)
25
+ sysconfdir = $(prefix)/etc
26
+ libdir = $(exec_prefix)/lib
27
+ sbindir = $(exec_prefix)/sbin
28
+ rubylibdir = $(libdir)/ruby/$(ruby_version)
29
+ docdir = $(datarootdir)/doc/$(PACKAGE)
30
+ dvidir = $(docdir)
31
+ vendordir = $(libdir)/ruby/vendor_ruby
32
+ datarootdir = $(prefix)/share
33
+ pdfdir = $(docdir)
34
+ archdir = $(rubylibdir)/$(arch)
35
+ sitearchdir = $(sitelibdir)/$(sitearch)
36
+ datadir = $(datarootdir)
37
+ localstatedir = $(prefix)/var
38
+ sitelibdir = $(sitedir)/$(ruby_version)
39
+
40
+ CC = gcc
41
+ LIBRUBY = $(LIBRUBY_SO)
42
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
43
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
44
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)
45
+
46
+ RUBY_EXTCONF_H =
47
+ CFLAGS = -fno-common -arch i386 -arch x86_64 -g -Os -pipe -fno-common -DENABLE_DTRACE -fno-common -pipe -fno-common $(cflags)
48
+ INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
49
+ DEFS =
50
+ CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags)
51
+ CXXFLAGS = $(CFLAGS)
52
+ ldflags = -L. -arch i386 -arch x86_64
53
+ dldflags =
54
+ archflag =
55
+ DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
56
+ LDSHARED = cc -arch i386 -arch x86_64 -pipe -bundle -undefined dynamic_lookup
57
+ AR = ar
58
+ EXEEXT =
59
+
60
+ RUBY_INSTALL_NAME = ruby
61
+ RUBY_SO_NAME = ruby
62
+ arch = universal-darwin10.0
63
+ sitearch = universal-darwin10.0
64
+ ruby_version = 1.8
65
+ ruby = /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/bin/ruby
66
+ RUBY = $(ruby)
67
+ RM = rm -f
68
+ MAKEDIRS = mkdir -p
69
+ INSTALL = /usr/bin/install -c
70
+ INSTALL_PROG = $(INSTALL) -m 0755
71
+ INSTALL_DATA = $(INSTALL) -m 644
72
+ COPY = cp
73
+
74
+ #### End of system configuration section. ####
75
+
76
+ preload =
77
+
78
+ libpath = . $(libdir)
79
+ LIBPATH = -L. -L$(libdir)
80
+ DEFFILE =
81
+
82
+ CLEANFILES = mkmf.log
83
+ DISTCLEANFILES =
84
+
85
+ extout =
86
+ extout_prefix =
87
+ target_prefix = /langscan/pairmatcher
88
+ LOCAL_LIBS =
89
+ LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl
90
+ SRCS = pairmatcher.c
91
+ OBJS = pairmatcher.o
92
+ TARGET = pairmatcher
93
+ DLLIB = $(TARGET).bundle
94
+ EXTSTATIC =
95
+ STATIC_LIB =
96
+
97
+ BINDIR = $(bindir)
98
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
99
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
100
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
101
+
102
+ TARGET_SO = $(DLLIB)
103
+ CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map
104
+ CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
105
+
106
+ all: $(DLLIB)
107
+ static: $(STATIC_LIB)
108
+
109
+ clean:
110
+ @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
111
+
112
+ distclean: clean
113
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
114
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
115
+
116
+ realclean: distclean
117
+ install: install-so install-rb
118
+
119
+ install-so: $(RUBYARCHDIR)
120
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
121
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
122
+ $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
123
+ install-rb: pre-install-rb install-rb-default
124
+ install-rb-default: pre-install-rb-default
125
+ pre-install-rb: Makefile
126
+ pre-install-rb-default: Makefile
127
+ $(RUBYARCHDIR):
128
+ $(MAKEDIRS) $@
129
+
130
+ site-install: site-install-so site-install-rb
131
+ site-install-so: install-so
132
+ site-install-rb: install-rb
133
+
134
+ .SUFFIXES: .c .m .cc .cxx .cpp .C .o
135
+
136
+ .cc.o:
137
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
138
+
139
+ .cxx.o:
140
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
141
+
142
+ .cpp.o:
143
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
144
+
145
+ .C.o:
146
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
147
+
148
+ .c.o:
149
+ $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
150
+
151
+ $(DLLIB): $(OBJS) Makefile
152
+ @-$(RM) $@
153
+ $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
154
+
155
+
156
+
157
+ $(OBJS): ruby.h defines.h
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+
3
+ create_makefile('langscan/pairmatcher/pairmatcher')
@@ -0,0 +1,890 @@
1
+ /*
2
+ * pairmatcher.c - a pair matching parser
3
+ *
4
+ * Copyright (C) 2005 Akira Tanaka <akr@m17n.org>
5
+ * All rights reserved.
6
+ * This is free software with ABSOLUTELY NO WARRANTY.
7
+ *
8
+ * You can redistribute it and/or modify it under the terms of
9
+ * the GNU General Public License version 2.
10
+ */
11
+
12
+ #include <ruby.h>
13
+
14
+ static ID id_get_token, id_new, id_call;
15
+ static VALUE Fragment;
16
+
17
+ #ifndef RSTRUCT_PTR
18
+ # define RSTRUCT_PTR(st) (RSTRUCT(st)->ptr)
19
+ #endif
20
+ #ifndef RSTRUCT_LEN
21
+ # define RSTRUCT_LEN(st) (RSTRUCT(st)->len)
22
+ #endif
23
+
24
+ #ifndef RARRAY_PTR
25
+ # define RARRAY_PTR(str) (RARRAY(str)->ptr)
26
+ #endif
27
+ #ifndef RARRAY_LEN
28
+ # define RARRAY_LEN(str) (RARRAY(str)->len)
29
+ #endif
30
+
31
+ #ifdef SYMBOL_P
32
+ # define Check_Symbol(val) do { if (!SYMBOL_P(val)) { Check_Type(val, T_SYMBOL); } } while (0)
33
+ #else
34
+ # define Check_Symbol(val) Check_Type(val, T_SYMBOL)
35
+ #endif
36
+
37
+ static VALUE
38
+ fragment_type(VALUE fragment)
39
+ {
40
+ VALUE val;
41
+ Check_Type(fragment, T_STRUCT);
42
+ if (RBASIC(fragment)->klass != Fragment) {
43
+ rb_raise(rb_eTypeError, "not fragment");
44
+ }
45
+ val = RSTRUCT_PTR(fragment)[0];
46
+ Check_Symbol(val);
47
+ return val;
48
+ }
49
+
50
+ static VALUE
51
+ fragment_text(VALUE fragment)
52
+ {
53
+ VALUE val;
54
+ Check_Type(fragment, T_STRUCT);
55
+ if (RBASIC(fragment)->klass != Fragment) {
56
+ rb_raise(rb_eTypeError, "not fragment");
57
+ }
58
+ val = RSTRUCT_PTR(fragment)[1];
59
+ StringValue(val);
60
+ return val;
61
+ }
62
+
63
+ static int
64
+ fragment_byteno(VALUE fragment)
65
+ {
66
+ VALUE val;
67
+ Check_Type(fragment, T_STRUCT);
68
+ if (RBASIC(fragment)->klass != Fragment) {
69
+ rb_raise(rb_eTypeError, "not fragment");
70
+ }
71
+ val = RSTRUCT_PTR(fragment)[3];
72
+ return NUM2INT(val);
73
+ }
74
+
75
+ typedef struct {
76
+ unsigned char before_open_max;
77
+ unsigned char after_open_max;
78
+ unsigned char before_close_max;
79
+ unsigned char after_close_max;
80
+ VALUE pair_defs;
81
+ VALUE intertoken_defs;
82
+ VALUE recent_tokens;
83
+ VALUE pair_stack;
84
+ VALUE closed_pairs;
85
+ } pairmatcher_t;
86
+
87
+ static void pairmatcher_mark(pairmatcher_t *pairmatcher)
88
+ {
89
+ if (pairmatcher == NULL)
90
+ return;
91
+ rb_gc_mark(pairmatcher->pair_defs);
92
+ rb_gc_mark(pairmatcher->intertoken_defs);
93
+ rb_gc_mark(pairmatcher->recent_tokens);
94
+ rb_gc_mark(pairmatcher->pair_stack);
95
+ rb_gc_mark(pairmatcher->closed_pairs);
96
+ }
97
+
98
+ static void pairmatcher_free(pairmatcher_t *pairmatcher)
99
+ {
100
+ if (pairmatcher == NULL)
101
+ return;
102
+ free((void *)pairmatcher);
103
+ }
104
+
105
+ static VALUE pairmatcher_s_allocate(VALUE klass)
106
+ {
107
+ return Data_Wrap_Struct(klass, pairmatcher_mark, pairmatcher_free, NULL);
108
+ }
109
+
110
+ static VALUE pairmatcher_initialize(
111
+ VALUE self,
112
+ VALUE before_open_max,
113
+ VALUE after_open_max,
114
+ VALUE before_close_max,
115
+ VALUE after_close_max)
116
+ {
117
+ pairmatcher_t *pairmatcher;
118
+
119
+ Data_Get_Struct(self, pairmatcher_t, pairmatcher);
120
+ if (pairmatcher != NULL) { rb_raise(rb_eArgError, "called twice"); }
121
+
122
+ pairmatcher = ALLOC(pairmatcher_t);
123
+ pairmatcher->pair_defs = Qnil;
124
+ pairmatcher->intertoken_defs = Qnil;
125
+ pairmatcher->recent_tokens = Qnil;
126
+ pairmatcher->pair_stack = Qnil;
127
+ pairmatcher->closed_pairs = Qnil;
128
+ DATA_PTR(self) = pairmatcher;
129
+
130
+ pairmatcher->before_open_max = NUM2INT(before_open_max);
131
+ pairmatcher->after_open_max = NUM2INT(after_open_max);
132
+ pairmatcher->before_close_max = NUM2INT(before_close_max);
133
+ pairmatcher->after_close_max = NUM2INT(after_close_max);
134
+ pairmatcher->pair_defs = rb_ary_new();
135
+ //RBASIC(pairmatcher->pair_defs)->klass = 0;
136
+ pairmatcher->intertoken_defs = rb_ary_new();
137
+ //RBASIC(pairmatcher->intertoken_defs)->klass = 0;
138
+ pairmatcher->recent_tokens = rb_ary_new();
139
+ //RBASIC(pairmatcher->recent_tokens)->klass = 0;
140
+ pairmatcher->pair_stack = rb_ary_new();
141
+ //RBASIC(pairmatcher->pair_stack)->klass = 0;
142
+ pairmatcher->closed_pairs = rb_ary_new();
143
+ //RBASIC(pairmatcher->closed_pairs)->klass = 0;
144
+ return self;
145
+ }
146
+
147
+ #define GetPM(obj, var) \
148
+ do { \
149
+ Data_Get_Struct((obj), pairmatcher_t, (var)); \
150
+ if ((var) == NULL) { rb_raise(rb_eArgError, "not initialized"); } \
151
+ } while(0)
152
+
153
+ static VALUE
154
+ pairmatcher_get_before_open_max(VALUE self)
155
+ {
156
+ pairmatcher_t *pairmatcher;
157
+ GetPM(self, pairmatcher);
158
+ if (pairmatcher == NULL) { rb_raise(rb_eArgError, "not initialized"); }
159
+ return INT2NUM(pairmatcher->before_open_max);
160
+ }
161
+
162
+ static VALUE
163
+ pairmatcher_get_after_open_max(VALUE self)
164
+ {
165
+ pairmatcher_t *pairmatcher;
166
+ GetPM(self, pairmatcher);
167
+ return INT2NUM(pairmatcher->after_open_max);
168
+ }
169
+
170
+ static VALUE
171
+ pairmatcher_get_before_close_max(VALUE self)
172
+ {
173
+ pairmatcher_t *pairmatcher;
174
+ GetPM(self, pairmatcher);
175
+ return INT2NUM(pairmatcher->before_close_max);
176
+ }
177
+
178
+ static VALUE
179
+ pairmatcher_get_after_close_max(VALUE self)
180
+ {
181
+ pairmatcher_t *pairmatcher;
182
+ GetPM(self, pairmatcher);
183
+ return INT2NUM(pairmatcher->after_close_max);
184
+ }
185
+
186
+ static VALUE
187
+ pairmatcher_define_pair(VALUE self, VALUE pair_type, VALUE open_type, VALUE open_text, VALUE close_type, VALUE close_text)
188
+ {
189
+ pairmatcher_t *pairmatcher;
190
+ VALUE def;
191
+ Check_Symbol(open_type);
192
+ if (open_text != Qnil) {
193
+ StringValue(open_text);
194
+ open_text = rb_str_new4(open_text);
195
+ }
196
+ Check_Symbol(close_type);
197
+ if (close_text != Qnil) {
198
+ StringValue(close_text);
199
+ close_text = rb_str_new4(close_text);
200
+ }
201
+
202
+ def = rb_ary_new3(5, open_type, open_text, close_type, close_text, pair_type);
203
+ //RBASIC(def)->klass = 0;
204
+
205
+ GetPM(self, pairmatcher);
206
+ rb_ary_push(pairmatcher->pair_defs, def);
207
+
208
+ return Qnil;
209
+ }
210
+
211
+ static VALUE
212
+ pairmatcher_define_intertoken_fragment(VALUE self, VALUE type, VALUE text)
213
+ {
214
+ pairmatcher_t *pairmatcher;
215
+ VALUE def;
216
+ Check_Symbol(type);
217
+ if (text != Qnil) {
218
+ StringValue(text);
219
+ text = rb_str_new4(text);
220
+ }
221
+
222
+ def = rb_ary_new3(2, type, text);
223
+ //RBASIC(def)->klass = 0;
224
+
225
+ GetPM(self, pairmatcher);
226
+ rb_ary_push(pairmatcher->intertoken_defs, def);
227
+
228
+ return Qnil;
229
+ }
230
+
231
+ static VALUE
232
+ get_token(VALUE tokenizer)
233
+ {
234
+ return rb_funcall(tokenizer, id_get_token, 0);
235
+ }
236
+
237
+ static VALUE
238
+ open_token_p(pairmatcher_t *pairmatcher, VALUE token)
239
+ {
240
+ int i;
241
+ VALUE type = fragment_type(token);
242
+ VALUE text = fragment_text(token);
243
+ for (i = 0; i < RARRAY_LEN(pairmatcher->pair_defs); i++) {
244
+ VALUE def = RARRAY_PTR(pairmatcher->pair_defs)[i];
245
+ VALUE open_type = RARRAY_PTR(def)[0];
246
+ VALUE open_text = RARRAY_PTR(def)[1];
247
+ if (open_type == Qnil || open_type == type) {
248
+ if (open_text == Qnil || rb_str_cmp(open_text, text) == 0) {
249
+ return def;
250
+ }
251
+ }
252
+ }
253
+ return Qfalse;
254
+ }
255
+
256
+ static VALUE
257
+ close_token_p(pairmatcher_t *pairmatcher, VALUE token, int *i)
258
+ {
259
+ VALUE type = fragment_type(token);
260
+ VALUE text = fragment_text(token);
261
+ for (; *i < RARRAY_LEN(pairmatcher->pair_defs); (*i)++) {
262
+ VALUE def = RARRAY_PTR(pairmatcher->pair_defs)[*i];
263
+ VALUE close_type = RARRAY_PTR(def)[2];
264
+ VALUE close_text = RARRAY_PTR(def)[3];
265
+ if (close_type == Qnil || close_type == type) {
266
+ if (close_text == Qnil || rb_str_cmp(close_text, text) == 0) {
267
+ return def;
268
+ }
269
+ }
270
+ }
271
+ return Qfalse;
272
+ }
273
+
274
+ #define pair_get_pair_def(pair) (RSTRUCT_PTR(pair)[0])
275
+ #define pair_get_before_open_len(pair) FIX2INT(RSTRUCT_PTR(pair)[1])
276
+ #define pair_get_around_open_tokens(pair) (RSTRUCT_PTR(pair)[2])
277
+ #define pair_get_before_close_len(pair) FIX2INT(RSTRUCT_PTR(pair)[3])
278
+ #define pair_get_around_close_tokens(pair) (RSTRUCT_PTR(pair)[4])
279
+ #define pair_get_outer(pair) (RSTRUCT_PTR(pair)[5])
280
+ #define pair_set_pair_def(pair, val) (RSTRUCT_PTR(pair)[0] = (val))
281
+ #define pair_set_before_open_len(pair, len) (RSTRUCT_PTR(pair)[1] = INT2FIX(len))
282
+ #define pair_set_around_open_tokens(pair, val) (RSTRUCT_PTR(pair)[2] = (val))
283
+ #define pair_set_before_close_len(pair, len) (RSTRUCT_PTR(pair)[3] = INT2FIX(len))
284
+ #define pair_set_around_close_tokens(pair, val) (RSTRUCT_PTR(pair)[4] = (val))
285
+
286
+ #define pair_get_after_open_len(pair) (RARRAY_LEN(pair_get_around_open_tokens(pair))-pair_get_before_open_len(pair)-1)
287
+ #define pair_get_after_close_len(pair) (RARRAY_LEN(pair_get_around_close_tokens(pair))-pair_get_before_close_len(pair)-1)
288
+
289
+ static VALUE Pair;
290
+
291
+ static VALUE
292
+ make_pair(VALUE pair_def, int before_open_len, VALUE around_open_tokens, VALUE outer)
293
+ {
294
+ VALUE pair = rb_struct_new(Pair,
295
+ pair_def,
296
+ INT2FIX(before_open_len),
297
+ around_open_tokens,
298
+ Qnil,
299
+ Qnil,
300
+ outer);
301
+ return pair;
302
+ }
303
+
304
+ static int
305
+ concat_recent_tokens(pairmatcher_t *pm, int max, VALUE ary)
306
+ {
307
+ int i;
308
+ if (RARRAY_LEN(pm->recent_tokens) <= max)
309
+ max = RARRAY_LEN(pm->recent_tokens);
310
+ for (i = 0; i < max; i++) {
311
+ rb_ary_push(ary, RARRAY_PTR(pm->recent_tokens)[RARRAY_LEN(pm->recent_tokens)-max+i]);
312
+ }
313
+ return max;
314
+ }
315
+
316
+ static void
317
+ put_open_token(pairmatcher_t *pm, VALUE open_token, VALUE pair_def)
318
+ {
319
+ int before_open_len, stack_len;
320
+ VALUE pair;
321
+ VALUE around_open_tokens;
322
+ around_open_tokens = rb_ary_new2(pm->before_open_max+1+pm->after_open_max);
323
+ before_open_len = concat_recent_tokens(pm, pm->before_open_max, around_open_tokens);
324
+ rb_ary_push(around_open_tokens, open_token);
325
+ stack_len = RARRAY_LEN(pm->pair_stack);
326
+ pair = make_pair(pair_def, before_open_len, around_open_tokens,
327
+ stack_len ? RARRAY_PTR(pm->pair_stack)[stack_len-1] : Qnil);
328
+ rb_ary_push(pm->pair_stack, pair);
329
+ }
330
+
331
+ static int
332
+ matching_open_depth(pairmatcher_t *pm, VALUE open_token, VALUE pair_def)
333
+ {
334
+ int i;
335
+ for (i = RARRAY_LEN(pm->pair_stack) - 1; 0 <= i; i--) {
336
+ if (pair_get_pair_def(RARRAY_PTR(pm->pair_stack)[i]) == pair_def) {
337
+ return i;
338
+ }
339
+ }
340
+ return -1;
341
+ }
342
+
343
+ static void
344
+ report_token_list_now(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len)
345
+ {
346
+ while (len) {
347
+ rb_funcall(reporter, id_call, 1, RARRAY_PTR(token_list)[beg]);
348
+ beg++;
349
+ len--;
350
+ }
351
+ }
352
+
353
+ static void
354
+ report_token_list_open_pair(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len)
355
+ {
356
+ int i;
357
+ for (i = RARRAY_LEN(pm->pair_stack) - 1; 0 <= i; i--) {
358
+ VALUE pair = RARRAY_PTR(pm->pair_stack)[i];
359
+ VALUE around_open = pair_get_around_open_tokens(pair);
360
+ int first_byteno = fragment_byteno(RARRAY_PTR(around_open)[0]);
361
+ int last_byteno = fragment_byteno(RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1]);
362
+ if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg])) {
363
+ report_token_list_now(pm, reporter, token_list, beg, len);
364
+ return;
365
+ }
366
+ /* last_byteno >= fragment_byteno(RARRAY_PTR(token_list)[beg]) */
367
+ if (fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) < first_byteno)
368
+ continue;
369
+ /*
370
+ * fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno
371
+ * first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])
372
+ */
373
+ if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])) {
374
+ int beg2 = beg+len-1;
375
+ int len2 = 1;
376
+ while (beg <= beg2 && last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg2-1])) {
377
+ beg2--;
378
+ len2++;
379
+ }
380
+ report_token_list_now(pm, reporter, token_list, beg2, len2);
381
+ len -= len2;
382
+ }
383
+ /*
384
+ * first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) <= last_byteno
385
+ * fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno
386
+ */
387
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])) {
388
+ len--;
389
+ }
390
+ if (len == 0)
391
+ return;
392
+ }
393
+ report_token_list_now(pm, reporter, token_list, beg, len);
394
+ }
395
+
396
+ static void
397
+ report_token_list_rec_closed_pair(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len, int closed_pairs_index)
398
+ {
399
+ VALUE pair;
400
+ VALUE around_close, around_open, first, last;
401
+ int first_byteno, last_byteno;
402
+ if (RARRAY_LEN(pm->closed_pairs) <= closed_pairs_index) {
403
+ report_token_list_open_pair(pm, reporter, token_list, beg, len);
404
+ return;
405
+ }
406
+ pair = RARRAY_PTR(pm->closed_pairs)[closed_pairs_index];
407
+ if (pair == Qnil) {
408
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
409
+ return;
410
+ }
411
+ around_close = pair_get_around_close_tokens(pair);
412
+ first = RARRAY_PTR(around_close)[0];
413
+ first_byteno = fragment_byteno(first);
414
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
415
+ len--;
416
+ if (len == 0)
417
+ return;
418
+ around_open = pair_get_around_open_tokens(pair);
419
+ first = RARRAY_PTR(around_open)[0];
420
+ first_byteno = fragment_byteno(first);
421
+ last = RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1];
422
+ last_byteno = fragment_byteno(last);
423
+ if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg]) ||
424
+ fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) < first_byteno) {
425
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
426
+ }
427
+ else if (first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg])) {
428
+ while (0 < len && fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno) {
429
+ beg++;
430
+ len--;
431
+ }
432
+ if (len)
433
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
434
+ }
435
+ else if (fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) <= last_byteno) {
436
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
437
+ len--;
438
+ if (len)
439
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
440
+ }
441
+ else {
442
+ int beg1, len1;
443
+ int beg2, len2;
444
+ beg1 = beg;
445
+ len1 = 1;
446
+ while (len1 < len && fragment_byteno(RARRAY_PTR(token_list)[beg1+len1]) < first_byteno)
447
+ len1++;
448
+ beg2 = beg + len - 1;
449
+ len2 = 1;
450
+ while (beg <= beg2-1 && last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg2-1])) {
451
+ beg2--;
452
+ len2++;
453
+ }
454
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg1, len1, closed_pairs_index+1);
455
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg2, len2, closed_pairs_index+1);
456
+ }
457
+ }
458
+
459
+ static void
460
+ report_token_list(pairmatcher_t *pm, VALUE token_list, int beg, int len, VALUE reporter)
461
+ {
462
+ if (len < 0)
463
+ len = RARRAY_LEN(token_list) - beg;
464
+ if (len == 0)
465
+ return;
466
+ if (RARRAY_LEN(pm->recent_tokens) != 0) {
467
+ int first_byteno = fragment_byteno(RARRAY_PTR(pm->recent_tokens)[0]);
468
+ if (first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[0]))
469
+ return;
470
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
471
+ len--;
472
+ }
473
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, 0);
474
+ }
475
+
476
+ static void
477
+ discard_unmatched_pair(pairmatcher_t *pm, VALUE reporter)
478
+ {
479
+ VALUE pair = rb_ary_pop(pm->pair_stack);
480
+ report_token_list(pm, pair_get_around_open_tokens(pair), 0, -1, reporter);
481
+ }
482
+
483
+ static void
484
+ put_close_token(pairmatcher_t *pm, VALUE close_token, VALUE pair_def, int depth, VALUE reporter)
485
+ {
486
+ VALUE pair, around_close_tokens;
487
+ int before_close_len;
488
+ while (depth+1 < RARRAY_LEN(pm->pair_stack)) {
489
+ discard_unmatched_pair(pm, reporter);
490
+ }
491
+ pair = rb_ary_pop(pm->pair_stack);
492
+ around_close_tokens = rb_ary_new2(pm->before_close_max+1+pm->after_close_max);
493
+ before_close_len = concat_recent_tokens(pm, pm->before_close_max, around_close_tokens);
494
+ rb_ary_push(around_close_tokens, close_token);
495
+ pair_set_before_close_len(pair, before_close_len);
496
+ pair_set_around_close_tokens(pair, around_close_tokens);
497
+ rb_ary_push(pm->closed_pairs, pair);
498
+ }
499
+
500
+ static void
501
+ add_recent(pairmatcher_t *pm, VALUE reporter, VALUE token)
502
+ {
503
+ int max = pm->before_open_max;
504
+ if (max < pm->before_close_max)
505
+ max = pm->before_close_max;
506
+ if (max == 0)
507
+ return;
508
+ if (RARRAY_LEN(pm->recent_tokens) < max) {
509
+ rb_ary_push(pm->recent_tokens, token);
510
+ }
511
+ else {
512
+ VALUE val = RARRAY_PTR(pm->recent_tokens)[0];
513
+ MEMMOVE(RARRAY_PTR(pm->recent_tokens),
514
+ RARRAY_PTR(pm->recent_tokens)+1,
515
+ VALUE, max-1);
516
+ RARRAY_PTR(pm->recent_tokens)[max-1] = token;
517
+ report_token_list(pm, rb_ary_new3(1, val), 0, -1, reporter);
518
+ }
519
+ }
520
+
521
+ static void
522
+ add_after_open(pairmatcher_t *pm, VALUE token)
523
+ {
524
+ int i;
525
+ int max = pm->after_open_max;
526
+ VALUE pair;
527
+ for (i = RARRAY_LEN(pm->pair_stack)-1; 0 <= i; i--) {
528
+ pair = RARRAY_PTR(pm->pair_stack)[i];
529
+ if (max <= pair_get_after_open_len(pair))
530
+ break;
531
+ rb_ary_push(pair_get_around_open_tokens(pair), token);
532
+ }
533
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
534
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
535
+ if (pair == Qnil)
536
+ continue;
537
+ if (pair_get_after_open_len(pair) < max)
538
+ rb_ary_push(pair_get_around_open_tokens(pair), token);
539
+ }
540
+ }
541
+
542
+ static void
543
+ add_after_close(pairmatcher_t *pm, VALUE token)
544
+ {
545
+ int i;
546
+ int max = pm->after_close_max;
547
+ VALUE pair;
548
+ for (i = RARRAY_LEN(pm->closed_pairs) - 1; 0 <= i; i--) {
549
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
550
+ if (pair == Qnil)
551
+ continue;
552
+ if (max <= pair_get_after_close_len(pair))
553
+ break;
554
+ rb_ary_push(pair_get_around_close_tokens(pair), token);
555
+ }
556
+ }
557
+
558
+ static void
559
+ discard_matched_pair(pairmatcher_t *pm, VALUE pair, VALUE reporter)
560
+ {
561
+ VALUE around_open = pair_get_around_open_tokens(pair);
562
+ VALUE around_close = pair_get_around_close_tokens(pair);
563
+ if (fragment_byteno(RARRAY_PTR(around_close)[0]) <=
564
+ fragment_byteno(RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1])) {
565
+ int len1 = RARRAY_LEN(around_open);
566
+ while (0 < len1 && fragment_byteno(RARRAY_PTR(around_close)[0]) <= fragment_byteno(RARRAY_PTR(around_open)[len1-1]))
567
+ len1--;
568
+ if (len1) {
569
+ report_token_list(pm, around_open, 0, len1, reporter);
570
+ }
571
+ report_token_list(pm, around_close, 0, -1, reporter);
572
+ }
573
+ else {
574
+ report_token_list(pm, around_open, 0, -1, reporter);
575
+ report_token_list(pm, around_close, 0, -1, reporter);
576
+ }
577
+ }
578
+
579
+ static VALUE
580
+ ary_subseq(VALUE ary, int beg, int len)
581
+ {
582
+ VALUE argv[2];
583
+ argv[0] = INT2NUM(beg);
584
+ argv[1] = INT2NUM(len);
585
+ return rb_ary_aref(2, argv, ary);
586
+ }
587
+
588
+ static void
589
+ yield_pair(VALUE pair)
590
+ {
591
+ rb_yield(pair);
592
+ }
593
+
594
+ static VALUE
595
+ pair_before_open_tokens(VALUE pair)
596
+ {
597
+ int before_len;
598
+ VALUE around_open = pair_get_around_open_tokens(pair);
599
+ before_len = pair_get_before_open_len(pair);
600
+ return ary_subseq(around_open, 0, before_len);
601
+ }
602
+
603
+ static VALUE
604
+ pair_around_open(VALUE pair, VALUE index)
605
+ {
606
+ int before_len;
607
+ int i = NUM2INT(index);
608
+ VALUE around_open = pair_get_around_open_tokens(pair);
609
+ before_len = pair_get_before_open_len(pair);
610
+ if (-before_len <= i && i <= pair_get_after_open_len(pair))
611
+ return rb_ary_entry(around_open, before_len+i);
612
+ else
613
+ return Qnil;
614
+ }
615
+
616
+ static VALUE
617
+ pair_open_token(VALUE pair)
618
+ {
619
+ int before_len;
620
+ VALUE around_open = pair_get_around_open_tokens(pair);
621
+ before_len = pair_get_before_open_len(pair);
622
+ return rb_ary_entry(around_open, before_len);
623
+ }
624
+
625
+ static VALUE
626
+ pair_after_open_tokens(VALUE pair)
627
+ {
628
+ int before_len, after_len;
629
+ VALUE around_open = pair_get_around_open_tokens(pair);
630
+ before_len = pair_get_before_open_len(pair);
631
+ after_len = pair_get_after_open_len(pair);
632
+ return ary_subseq(around_open, before_len+1, after_len);
633
+ return ary_subseq(around_open, 0, before_len);
634
+ }
635
+
636
+ static VALUE
637
+ pair_before_close_tokens(VALUE pair)
638
+ {
639
+ int before_len;
640
+ VALUE around_close = pair_get_around_close_tokens(pair);
641
+ if (around_close == Qnil) return Qnil;
642
+ before_len = pair_get_before_close_len(pair);
643
+ return ary_subseq(around_close, 0, before_len);
644
+ }
645
+
646
+ static VALUE
647
+ pair_around_close(VALUE pair, VALUE index)
648
+ {
649
+ int before_len;
650
+ int i = NUM2INT(index);
651
+ VALUE around_close = pair_get_around_close_tokens(pair);
652
+ if (around_close == Qnil) return Qnil;
653
+ before_len = pair_get_before_close_len(pair);
654
+ if (-before_len <= i && i <= pair_get_after_close_len(pair))
655
+ return rb_ary_entry(around_close, before_len+i);
656
+ else
657
+ return Qnil;
658
+ }
659
+
660
+ static VALUE
661
+ pair_close_token(VALUE pair)
662
+ {
663
+ int before_len;
664
+ VALUE around_close = pair_get_around_close_tokens(pair);
665
+ if (around_close == Qnil) return Qnil;
666
+ before_len = pair_get_before_close_len(pair);
667
+ return rb_ary_entry(around_close, before_len);
668
+ }
669
+
670
+ static VALUE
671
+ pair_after_close_tokens(VALUE pair)
672
+ {
673
+ int before_len, after_len;
674
+ VALUE around_close = pair_get_around_close_tokens(pair);
675
+ if (around_close == Qnil) return Qnil;
676
+ before_len = pair_get_before_close_len(pair);
677
+ after_len = pair_get_after_close_len(pair);
678
+ return ary_subseq(around_close, before_len+1, after_len);
679
+ return ary_subseq(around_close, 0, before_len);
680
+ }
681
+
682
+ static VALUE
683
+ pair_before_open_length(VALUE pair)
684
+ {
685
+ return INT2FIX(pair_get_before_open_len(pair));
686
+ }
687
+
688
+ static VALUE
689
+ pair_after_open_length(VALUE pair)
690
+ {
691
+ return INT2FIX(pair_get_after_open_len(pair));
692
+ }
693
+
694
+ static VALUE
695
+ pair_before_close_length(VALUE pair)
696
+ {
697
+ VALUE around_close = pair_get_around_close_tokens(pair);
698
+ if (around_close == Qnil) return Qnil;
699
+ return INT2FIX(pair_get_before_close_len(pair));
700
+ }
701
+
702
+ static VALUE
703
+ pair_after_close_length(VALUE pair)
704
+ {
705
+ VALUE around_close = pair_get_around_close_tokens(pair);
706
+ if (around_close == Qnil) return Qnil;
707
+ return INT2FIX(pair_get_after_close_len(pair));
708
+ }
709
+
710
+ static VALUE
711
+ pair_pair_type(VALUE pair)
712
+ {
713
+ VALUE def = pair_get_pair_def(pair);
714
+ return RARRAY_PTR(def)[4];
715
+ }
716
+
717
+ static void
718
+ check_closed_pairs(pairmatcher_t *pm, VALUE reporter)
719
+ {
720
+ int i, j;
721
+ int after_open_max = pm->after_open_max;
722
+ int after_close_max = pm->after_close_max;
723
+ VALUE pair;
724
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
725
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
726
+ if (pair == Qnil)
727
+ continue;
728
+ if (pair_get_after_open_len(pair) == after_open_max &&
729
+ pair_get_after_close_len(pair) == after_close_max) {
730
+ RARRAY_PTR(pm->closed_pairs)[i] = Qnil;
731
+ yield_pair(pair);
732
+ discard_matched_pair(pm, pair, reporter);
733
+ }
734
+ }
735
+ j = 0;
736
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
737
+ if (RARRAY_PTR(pm->closed_pairs)[i] != Qnil) {
738
+ RARRAY_PTR(pm->closed_pairs)[j] = RARRAY_PTR(pm->closed_pairs)[i];
739
+ j++;
740
+ }
741
+ }
742
+ while (j < RARRAY_LEN(pm->closed_pairs))
743
+ rb_ary_pop(pm->closed_pairs);
744
+ }
745
+
746
+ static void
747
+ put_token(pairmatcher_t *pairmatcher, VALUE token, VALUE reporter)
748
+ {
749
+ VALUE pair_def, tmp_pair_def;
750
+ int depth, max_depth, i;
751
+ add_after_open(pairmatcher, token);
752
+ add_after_close(pairmatcher, token);
753
+ check_closed_pairs(pairmatcher, reporter);
754
+
755
+ if ((pair_def = open_token_p(pairmatcher, token))) {
756
+ put_open_token(pairmatcher, token, pair_def);
757
+ }
758
+ else {
759
+ i = 0;
760
+ max_depth = 0;
761
+ pair_def = Qfalse;
762
+ while ((tmp_pair_def = close_token_p(pairmatcher, token, &i)) != Qfalse) {
763
+ depth = matching_open_depth(pairmatcher, token, tmp_pair_def);
764
+ if (max_depth <= depth) {
765
+ pair_def = tmp_pair_def;
766
+ max_depth = depth;
767
+ }
768
+ i++;
769
+ }
770
+ if (pair_def != Qfalse) {
771
+ put_close_token(pairmatcher, token, pair_def, max_depth, reporter);
772
+ }
773
+ }
774
+ add_recent(pairmatcher, reporter, token);
775
+ }
776
+
777
+ static int
778
+ intertoken_p(pairmatcher_t *pairmatcher, VALUE token_type)
779
+ {
780
+ int i;
781
+ for (i = 0; i < RARRAY_LEN(pairmatcher->intertoken_defs); i++) {
782
+ VALUE def = RARRAY_PTR(pairmatcher->intertoken_defs)[i];
783
+ VALUE def_type = RARRAY_PTR(def)[0];
784
+ //VALUE def_text = RARRAY_PTR(def)[1];
785
+ if (def_type == token_type) {
786
+ return 1;
787
+ }
788
+ }
789
+ return 0;
790
+ }
791
+
792
+ static void
793
+ finish(pairmatcher_t *pm, VALUE reporter)
794
+ {
795
+ int i;
796
+ VALUE pair;
797
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
798
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
799
+ if (pair == Qnil)
800
+ continue;
801
+ RARRAY_PTR(pm->closed_pairs)[i] = Qnil;
802
+ yield_pair(pair);
803
+ discard_matched_pair(pm, pair, reporter);
804
+ }
805
+ while (RARRAY_LEN(pm->pair_stack)) {
806
+ discard_unmatched_pair(pm, reporter);
807
+ }
808
+ report_token_list_now(pm, reporter, pm->recent_tokens, 0, RARRAY_LEN(pm->recent_tokens));
809
+ }
810
+
811
+ static void
812
+ parse(pairmatcher_t *pm, VALUE tokenizer, VALUE reporter)
813
+ {
814
+ VALUE token_info;
815
+ while ((token_info = get_token(tokenizer)) != Qnil) {
816
+ VALUE token_type, token_text, token_lineno, token_byteno;
817
+ VALUE token;
818
+ Check_Type(token_info, T_ARRAY);
819
+ if (RARRAY_LEN(token_info) != 8) {
820
+ rb_raise(rb_eArgError, "unexpected token");
821
+ }
822
+ token_type = RARRAY_PTR(token_info)[0];
823
+ token_text = RARRAY_PTR(token_info)[1];
824
+ token_lineno = RARRAY_PTR(token_info)[2];
825
+ token_byteno = RARRAY_PTR(token_info)[4];
826
+ token = rb_funcall(Fragment, id_new, 4, token_type, token_text, token_lineno, token_byteno);
827
+ if (intertoken_p(pm, token_type)) {
828
+ rb_funcall(reporter, id_call, 1, token);
829
+ }
830
+ else {
831
+ put_token(pm, token, reporter);
832
+ }
833
+ }
834
+ finish(pm, reporter);
835
+ }
836
+
837
+ static VALUE
838
+ pairmatcher_parse(VALUE self, VALUE tokenizer, VALUE reporter)
839
+ {
840
+ pairmatcher_t *pairmatcher;
841
+ GetPM(self, pairmatcher);
842
+ parse(pairmatcher, tokenizer, reporter);
843
+
844
+ return Qnil;
845
+ }
846
+
847
+ void Init_pairmatcher(void)
848
+ {
849
+ VALUE LangScan = rb_const_get(rb_cObject, rb_intern("LangScan"));
850
+ VALUE PairMatcher = rb_define_class_under(LangScan, "PairMatcher", rb_cData);
851
+ Fragment = rb_const_get(LangScan, rb_intern("Fragment"));
852
+ rb_global_variable(&Fragment);
853
+
854
+ id_get_token = rb_intern("get_token");
855
+ id_new = rb_intern("new");
856
+ id_call = rb_intern("call");
857
+
858
+ rb_define_alloc_func(PairMatcher, pairmatcher_s_allocate);
859
+ rb_define_method(PairMatcher, "initialize", pairmatcher_initialize, 4);
860
+ //rb_define_method(PairMatcher, "initialize_copy", pairmatcher_initialize_copy, 1);
861
+ rb_define_method(PairMatcher, "define_intertoken_fragment", pairmatcher_define_intertoken_fragment, 2);
862
+ rb_define_method(PairMatcher, "define_pair", pairmatcher_define_pair, 5);
863
+ rb_define_method(PairMatcher, "before_open_max", pairmatcher_get_before_open_max, 0);
864
+ rb_define_method(PairMatcher, "after_open_max", pairmatcher_get_after_open_max, 0);
865
+ rb_define_method(PairMatcher, "before_close_max", pairmatcher_get_before_close_max, 0);
866
+ rb_define_method(PairMatcher, "after_close_max", pairmatcher_get_after_close_max, 0);
867
+ rb_define_method(PairMatcher, "parse", pairmatcher_parse, 2);
868
+
869
+ Pair = rb_struct_define("LangScanPair",
870
+ "pair_def",
871
+ "before_open_len",
872
+ "around_open_tokens",
873
+ "before_close_len",
874
+ "around_close_tokens",
875
+ "outer",
876
+ NULL);
877
+ rb_define_method(Pair, "before_open_tokens", pair_before_open_tokens, 0);
878
+ rb_define_method(Pair, "before_open_length", pair_before_open_length, 0);
879
+ rb_define_method(Pair, "around_open", pair_around_open, 1);
880
+ rb_define_method(Pair, "open_token", pair_open_token, 0);
881
+ rb_define_method(Pair, "after_open_tokens", pair_after_open_tokens, 0);
882
+ rb_define_method(Pair, "after_open_length", pair_after_open_length, 0);
883
+ rb_define_method(Pair, "before_close_tokens", pair_before_close_tokens, 0);
884
+ rb_define_method(Pair, "before_close_length", pair_before_close_length, 0);
885
+ rb_define_method(Pair, "around_close", pair_around_close, 1);
886
+ rb_define_method(Pair, "close_token", pair_close_token, 0);
887
+ rb_define_method(Pair, "after_close_tokens", pair_after_close_tokens, 0);
888
+ rb_define_method(Pair, "after_close_length", pair_after_close_length, 0);
889
+ rb_define_method(Pair, "pair_type", pair_pair_type, 0);
890
+ }