langscan 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. data/AUTHORS.txt +19 -0
  2. data/History.txt +126 -0
  3. data/Manifest.txt +167 -0
  4. data/README.rdoc +89 -0
  5. data/Rakefile +40 -0
  6. data/ext/langscan/_make_c.rb +20 -0
  7. data/ext/langscan/_make_h.rb +30 -0
  8. data/ext/langscan/_template.c +134 -0
  9. data/ext/langscan/_template.h +53 -0
  10. data/ext/langscan/c/c/Makefile +157 -0
  11. data/ext/langscan/c/c/c.c +134 -0
  12. data/ext/langscan/c/c/c.h +66 -0
  13. data/ext/langscan/c/c/ctok.c +4622 -0
  14. data/ext/langscan/c/c/ctok.l +212 -0
  15. data/ext/langscan/c/c/extconf.rb +3 -0
  16. data/ext/langscan/c/c/modulename.txt +1 -0
  17. data/ext/langscan/c/c/tokenlist.txt +13 -0
  18. data/ext/langscan/csharp/csharp/Makefile +157 -0
  19. data/ext/langscan/csharp/csharp/csharp.c +134 -0
  20. data/ext/langscan/csharp/csharp/csharp.h +65 -0
  21. data/ext/langscan/csharp/csharp/csharptok.c +2965 -0
  22. data/ext/langscan/csharp/csharp/csharptok.l +200 -0
  23. data/ext/langscan/csharp/csharp/extconf.rb +3 -0
  24. data/ext/langscan/csharp/csharp/modulename.txt +1 -0
  25. data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
  26. data/ext/langscan/d/d/Makefile +157 -0
  27. data/ext/langscan/d/d/d.c +134 -0
  28. data/ext/langscan/d/d/d.h +64 -0
  29. data/ext/langscan/d/d/dtok.c +5461 -0
  30. data/ext/langscan/d/d/dtok.l +282 -0
  31. data/ext/langscan/d/d/extconf.rb +3 -0
  32. data/ext/langscan/d/d/modulename.txt +1 -0
  33. data/ext/langscan/d/d/tokenlist.txt +11 -0
  34. data/ext/langscan/elisp/elisp/Makefile +157 -0
  35. data/ext/langscan/elisp/elisp/elisp.c +134 -0
  36. data/ext/langscan/elisp/elisp/elisp.h +62 -0
  37. data/ext/langscan/elisp/elisp/elisptok.c +2101 -0
  38. data/ext/langscan/elisp/elisp/elisptok.l +151 -0
  39. data/ext/langscan/elisp/elisp/extconf.rb +3 -0
  40. data/ext/langscan/elisp/elisp/modulename.txt +1 -0
  41. data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
  42. data/ext/langscan/java/java/Makefile +157 -0
  43. data/ext/langscan/java/java/extconf.rb +3 -0
  44. data/ext/langscan/java/java/java.c +134 -0
  45. data/ext/langscan/java/java/java.h +64 -0
  46. data/ext/langscan/java/java/javatok.c +2090 -0
  47. data/ext/langscan/java/java/javatok.l +155 -0
  48. data/ext/langscan/java/java/modulename.txt +1 -0
  49. data/ext/langscan/java/java/tokenlist.txt +11 -0
  50. data/ext/langscan/javascript/javascript/Makefile +157 -0
  51. data/ext/langscan/javascript/javascript/extconf.rb +3 -0
  52. data/ext/langscan/javascript/javascript/javascript.c +134 -0
  53. data/ext/langscan/javascript/javascript/javascript.h +63 -0
  54. data/ext/langscan/javascript/javascript/javascripttok.c +2051 -0
  55. data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
  56. data/ext/langscan/javascript/javascript/modulename.txt +1 -0
  57. data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
  58. data/ext/langscan/pairmatcher/pairmatcher/Makefile +157 -0
  59. data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
  60. data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
  61. data/ext/langscan/php/php/Makefile +157 -0
  62. data/ext/langscan/php/php/extconf.rb +3 -0
  63. data/ext/langscan/php/php/modulename.txt +1 -0
  64. data/ext/langscan/php/php/php.c +134 -0
  65. data/ext/langscan/php/php/php.h +64 -0
  66. data/ext/langscan/php/php/phptok.c +2406 -0
  67. data/ext/langscan/php/php/phptok.l +212 -0
  68. data/ext/langscan/php/php/tokenlist.txt +11 -0
  69. data/ext/langscan/post-distclean.rb +21 -0
  70. data/ext/langscan/pre-config.rb +57 -0
  71. data/ext/langscan/python/python/Makefile +157 -0
  72. data/ext/langscan/python/python/extconf.rb +3 -0
  73. data/ext/langscan/python/python/modulename.txt +1 -0
  74. data/ext/langscan/python/python/python.c +134 -0
  75. data/ext/langscan/python/python/python.h +61 -0
  76. data/ext/langscan/python/python/pythontok.c +2102 -0
  77. data/ext/langscan/python/python/pythontok.l +155 -0
  78. data/ext/langscan/python/python/tokenlist.txt +8 -0
  79. data/ext/langscan/ruby/compat/ripper/Makefile +158 -0
  80. data/ext/langscan/ruby/compat/ripper/depend +1 -0
  81. data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
  82. data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
  83. data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
  84. data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
  85. data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
  86. data/ext/langscan/scheme/scheme/Makefile +157 -0
  87. data/ext/langscan/scheme/scheme/extconf.rb +3 -0
  88. data/ext/langscan/scheme/scheme/modulename.txt +1 -0
  89. data/ext/langscan/scheme/scheme/scheme.c +134 -0
  90. data/ext/langscan/scheme/scheme/scheme.h +60 -0
  91. data/ext/langscan/scheme/scheme/schemetok.c +2447 -0
  92. data/ext/langscan/scheme/scheme/schemetok.l +177 -0
  93. data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
  94. data/ext/langscan/sh/sh/Makefile +157 -0
  95. data/ext/langscan/sh/sh/extconf.rb +3 -0
  96. data/ext/langscan/sh/sh/modulename.txt +1 -0
  97. data/ext/langscan/sh/sh/sh.c +134 -0
  98. data/ext/langscan/sh/sh/sh.h +61 -0
  99. data/ext/langscan/sh/sh/shtok.c +2470 -0
  100. data/ext/langscan/sh/sh/shtok.l +325 -0
  101. data/ext/langscan/sh/sh/tokenlist.txt +8 -0
  102. data/lib/langscan.rb +124 -0
  103. data/lib/langscan/_common.rb +50 -0
  104. data/lib/langscan/_easyscanner.rb +78 -0
  105. data/lib/langscan/_pairmatcher.rb +46 -0
  106. data/lib/langscan/_type.rb +125 -0
  107. data/lib/langscan/autoconf.rb +51 -0
  108. data/lib/langscan/automake.rb +51 -0
  109. data/lib/langscan/brainfuck.rb +48 -0
  110. data/lib/langscan/c.rb +144 -0
  111. data/lib/langscan/csharp.rb +101 -0
  112. data/lib/langscan/css.rb +109 -0
  113. data/lib/langscan/d.rb +201 -0
  114. data/lib/langscan/eiffel.rb +167 -0
  115. data/lib/langscan/elisp.rb +132 -0
  116. data/lib/langscan/io.rb +84 -0
  117. data/lib/langscan/java.rb +95 -0
  118. data/lib/langscan/javascript.rb +97 -0
  119. data/lib/langscan/lua.rb +116 -0
  120. data/lib/langscan/ocaml.rb +298 -0
  121. data/lib/langscan/ocaml/camlexer.ml +28 -0
  122. data/lib/langscan/ocaml/lexer.mll +230 -0
  123. data/lib/langscan/ocaml/types.ml +36 -0
  124. data/lib/langscan/perl.rb +87 -0
  125. data/lib/langscan/perl/tokenizer.pl +231 -0
  126. data/lib/langscan/php.rb +80 -0
  127. data/lib/langscan/python.rb +101 -0
  128. data/lib/langscan/rpmspec.rb +71 -0
  129. data/lib/langscan/ruby.rb +164 -0
  130. data/lib/langscan/ruby/compat/README +5 -0
  131. data/lib/langscan/ruby/compat/ripper.rb +4 -0
  132. data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
  133. data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
  134. data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
  135. data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
  136. data/lib/langscan/scheme.rb +160 -0
  137. data/lib/langscan/sh.rb +116 -0
  138. data/lib/langscan/text.rb +37 -0
  139. data/metaconfig +2 -0
  140. data/script/console +10 -0
  141. data/script/destroy +14 -0
  142. data/script/generate +14 -0
  143. data/script/makemanifest.rb +21 -0
  144. data/setup.rb +1604 -0
  145. data/tasks/extconf.rake +13 -0
  146. data/tasks/extconf/langscan.rake +42 -0
  147. data/test/langscan/brainfuck/test/test_scan.rb +55 -0
  148. data/test/langscan/c/test/test_scan.rb +216 -0
  149. data/test/langscan/c/test/test_token.rb +41 -0
  150. data/test/langscan/csharp/test/test_scan.rb +157 -0
  151. data/test/langscan/css/test/test_css.rb +79 -0
  152. data/test/langscan/d/test/test_scan.rb +233 -0
  153. data/test/langscan/d/test/test_token.rb +205 -0
  154. data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
  155. data/test/langscan/elisp/test/test_elisp.rb +177 -0
  156. data/test/langscan/io/test/test_io.rb +79 -0
  157. data/test/langscan/java/test/test_java.rb +74 -0
  158. data/test/langscan/javascript/test/test_javascript.rb +39 -0
  159. data/test/langscan/lua/test/test_lua.rb +69 -0
  160. data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
  161. data/test/langscan/php/test/test_scan.rb +138 -0
  162. data/test/langscan/python/test/test_scan.rb +105 -0
  163. data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
  164. data/test/langscan/ruby/test/test_scan.rb +71 -0
  165. data/test/langscan/scheme/test/test_scan.rb +198 -0
  166. data/test/test_helper.rb +7 -0
  167. data/test/test_langscan.rb +123 -0
  168. metadata +296 -0
@@ -0,0 +1,147 @@
1
+ /*
2
+ * javascript.l - a lex rule for JavaScript
3
+ *
4
+ * Copyright (C) 2005 Keisuke Nishida <knishida@open-cobol.org>
5
+ * All rights reserved.
6
+ * This is free software with ABSOLUTELY NO WARRANTY.
7
+ *
8
+ * You can redistribute it and/or modify it under the terms of
9
+ * the GNU General Public License version 2.
10
+ */
11
+
12
+ %option reentrant
13
+ %option prefix="langscan_javascript_lex_"
14
+ %option noyywrap
15
+ %option nodefault
16
+
17
+ slash \/
18
+ star \*
19
+ nonstar [^\*]
20
+ nonslashstar [^\/\*]
21
+ commentcontent {star}+{nonslashstar}{nonstar}*
22
+ comment {slash}{star}{nonstar}*{commentcontent}*{star}+{slash}
23
+
24
+ %{
25
+
26
+ #include "javascript.h"
27
+
28
+ #define YY_EXTRA_TYPE langscan_javascript_lex_extra_t *
29
+
30
+ #if YY_NULL != 0
31
+ #error "YY_NULL is not 0."
32
+ #endif
33
+
34
+ #define YY_DECL langscan_javascript_token_t langscan_javascript_lex_lex(yyscan_t yyscanner)
35
+
36
+ #define YY_INPUT(buf,result,max_size) \
37
+ if (!yyextra->eof) { \
38
+ result = yyextra->user_read(&(yyextra->user_data), (buf), (max_size)); \
39
+ if (result == 0) \
40
+ yyextra->eof = 1; \
41
+ }
42
+
43
+ #define UPD update_pos(yyextra, yytext, yyleng)
44
+ static void update_pos(langscan_javascript_lex_extra_t *, char *, int);
45
+
46
+ #define report(token) \
47
+ do { \
48
+ yyextra->text = yytext; \
49
+ yyextra->leng = yyleng; \
50
+ return langscan_javascript_##token; \
51
+ } while (0)
52
+
53
+ %}
54
+
55
+ %%
56
+ [ \t\f\r]+ { UPD; report(space); }
57
+ \n { UPD; report(space); }
58
+ "//".* { UPD; report(comment); }
59
+ {comment} { UPD; report(comment); }
60
+ \"([^\\\"]|\\.)*\" { UPD; report(string); }
61
+ [A-Za-z_][0-9A-Za-z_]* { UPD; report(ident); }
62
+ . { UPD; report(punct); }
63
+
64
+ %%
65
+
66
+ static void update_pos(
67
+ langscan_javascript_lex_extra_t *extra,
68
+ char *text,
69
+ int leng)
70
+ {
71
+ int i, j;
72
+ extra->beg_byteno = extra->end_byteno;
73
+ extra->beg_lineno = extra->end_lineno;
74
+ extra->beg_columnno = extra->end_columnno;
75
+ j = 0;
76
+ for (i = 0; i < leng; i++) {
77
+ if (text[i] == '\n') {
78
+ extra->end_lineno++;
79
+ j = i + 1;
80
+ extra->end_columnno = 0;
81
+ }
82
+ }
83
+ extra->end_columnno += leng - j;
84
+ extra->end_byteno += leng;
85
+ }
86
+
87
+ langscan_javascript_tokenizer_t *langscan_javascript_make_tokenizer(
88
+ size_t (*user_read)(void **user_data_p, char *buf, size_t maxlen),
89
+ void *user_data)
90
+ {
91
+ langscan_javascript_tokenizer_t *tokenizer;
92
+ langscan_javascript_lex_extra_t *extra;
93
+ tokenizer = (langscan_javascript_tokenizer_t *)malloc(sizeof(langscan_javascript_tokenizer_t));
94
+ if (tokenizer == NULL)
95
+ return NULL;
96
+ extra = (langscan_javascript_lex_extra_t *)malloc(sizeof(langscan_javascript_lex_extra_t));
97
+ if (extra == NULL)
98
+ return NULL;
99
+ extra->user_read = user_read;
100
+ extra->user_data = user_data;
101
+ extra->beg_lineno = 1;
102
+ extra->beg_columnno = 0;
103
+ extra->beg_byteno = 0;
104
+ extra->end_lineno = 1;
105
+ extra->end_columnno = 0;
106
+ extra->end_byteno = 0;
107
+ extra->eof = 0;
108
+ tokenizer->extra = extra;
109
+ langscan_javascript_lex_lex_init(&tokenizer->scanner);
110
+ langscan_javascript_lex_set_extra(extra, tokenizer->scanner);
111
+ return tokenizer;
112
+ }
113
+
114
+ langscan_javascript_token_t langscan_javascript_get_token(langscan_javascript_tokenizer_t *tokenizer)
115
+ {
116
+ return langscan_javascript_lex_lex(tokenizer->scanner);
117
+ }
118
+
119
+ void langscan_javascript_free_tokenizer(langscan_javascript_tokenizer_t *tokenizer)
120
+ {
121
+ langscan_javascript_lex_extra_t *extra = langscan_javascript_lex_get_extra(tokenizer->scanner);
122
+ free((void *)extra);
123
+ langscan_javascript_lex_lex_destroy(tokenizer->scanner);
124
+ free((void *)tokenizer);
125
+ }
126
+
127
+ user_read_t langscan_javascript_tokenizer_get_user_read(langscan_javascript_tokenizer_t *tokenizer)
128
+ {
129
+ return tokenizer->extra->user_read;
130
+ }
131
+
132
+ void *langscan_javascript_tokenizer_get_user_data(langscan_javascript_tokenizer_t *tokenizer)
133
+ {
134
+ return tokenizer->extra->user_data;
135
+ }
136
+
137
+ const char *langscan_javascript_token_name(langscan_javascript_token_t token)
138
+ {
139
+ static char *token_names[] = {
140
+ "*eof*",
141
+ #define LANGSCAN_JAVASCRIPT_TOKEN(name) #name,
142
+ LANGSCAN_JAVASCRIPT_TOKEN_LIST
143
+ #undef LANGSCAN_JAVASCRIPT_TOKEN
144
+ };
145
+
146
+ return token_names[token];
147
+ }
@@ -0,0 +1 @@
1
+ JavaScript
@@ -0,0 +1,10 @@
1
+ preproc_beg
2
+ preproc_end
3
+ character
4
+ integer
5
+ floating
6
+ string
7
+ ident
8
+ punct
9
+ comment
10
+ space
@@ -0,0 +1,157 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = /Users/osuka/devel/git/langscan/ext/langscan/pairmatcher/pairmatcher
7
+ topdir = /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin10.0
8
+ hdrdir = $(topdir)
9
+ VPATH = $(srcdir):$(topdir):$(hdrdir)
10
+ exec_prefix = $(prefix)
11
+ prefix = $(DESTDIR)/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr
12
+ sharedstatedir = $(prefix)/com
13
+ mandir = $(DESTDIR)/usr/share/man
14
+ psdir = $(docdir)
15
+ oldincludedir = $(DESTDIR)/usr/include
16
+ localedir = $(datarootdir)/locale
17
+ bindir = $(exec_prefix)/bin
18
+ libexecdir = $(exec_prefix)/libexec
19
+ sitedir = $(DESTDIR)/Library/Ruby/Site
20
+ htmldir = $(docdir)
21
+ vendorarchdir = $(vendorlibdir)/$(sitearch)
22
+ includedir = $(prefix)/include
23
+ infodir = $(DESTDIR)/usr/share/info
24
+ vendorlibdir = $(vendordir)/$(ruby_version)
25
+ sysconfdir = $(prefix)/etc
26
+ libdir = $(exec_prefix)/lib
27
+ sbindir = $(exec_prefix)/sbin
28
+ rubylibdir = $(libdir)/ruby/$(ruby_version)
29
+ docdir = $(datarootdir)/doc/$(PACKAGE)
30
+ dvidir = $(docdir)
31
+ vendordir = $(libdir)/ruby/vendor_ruby
32
+ datarootdir = $(prefix)/share
33
+ pdfdir = $(docdir)
34
+ archdir = $(rubylibdir)/$(arch)
35
+ sitearchdir = $(sitelibdir)/$(sitearch)
36
+ datadir = $(datarootdir)
37
+ localstatedir = $(prefix)/var
38
+ sitelibdir = $(sitedir)/$(ruby_version)
39
+
40
+ CC = gcc
41
+ LIBRUBY = $(LIBRUBY_SO)
42
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
43
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
44
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)
45
+
46
+ RUBY_EXTCONF_H =
47
+ CFLAGS = -fno-common -arch i386 -arch x86_64 -g -Os -pipe -fno-common -DENABLE_DTRACE -fno-common -pipe -fno-common $(cflags)
48
+ INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
49
+ DEFS =
50
+ CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags)
51
+ CXXFLAGS = $(CFLAGS)
52
+ ldflags = -L. -arch i386 -arch x86_64
53
+ dldflags =
54
+ archflag =
55
+ DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
56
+ LDSHARED = cc -arch i386 -arch x86_64 -pipe -bundle -undefined dynamic_lookup
57
+ AR = ar
58
+ EXEEXT =
59
+
60
+ RUBY_INSTALL_NAME = ruby
61
+ RUBY_SO_NAME = ruby
62
+ arch = universal-darwin10.0
63
+ sitearch = universal-darwin10.0
64
+ ruby_version = 1.8
65
+ ruby = /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/bin/ruby
66
+ RUBY = $(ruby)
67
+ RM = rm -f
68
+ MAKEDIRS = mkdir -p
69
+ INSTALL = /usr/bin/install -c
70
+ INSTALL_PROG = $(INSTALL) -m 0755
71
+ INSTALL_DATA = $(INSTALL) -m 644
72
+ COPY = cp
73
+
74
+ #### End of system configuration section. ####
75
+
76
+ preload =
77
+
78
+ libpath = . $(libdir)
79
+ LIBPATH = -L. -L$(libdir)
80
+ DEFFILE =
81
+
82
+ CLEANFILES = mkmf.log
83
+ DISTCLEANFILES =
84
+
85
+ extout =
86
+ extout_prefix =
87
+ target_prefix = /langscan/pairmatcher
88
+ LOCAL_LIBS =
89
+ LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl
90
+ SRCS = pairmatcher.c
91
+ OBJS = pairmatcher.o
92
+ TARGET = pairmatcher
93
+ DLLIB = $(TARGET).bundle
94
+ EXTSTATIC =
95
+ STATIC_LIB =
96
+
97
+ BINDIR = $(bindir)
98
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
99
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
100
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
101
+
102
+ TARGET_SO = $(DLLIB)
103
+ CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map
104
+ CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
105
+
106
+ all: $(DLLIB)
107
+ static: $(STATIC_LIB)
108
+
109
+ clean:
110
+ @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
111
+
112
+ distclean: clean
113
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
114
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
115
+
116
+ realclean: distclean
117
+ install: install-so install-rb
118
+
119
+ install-so: $(RUBYARCHDIR)
120
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
121
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
122
+ $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
123
+ install-rb: pre-install-rb install-rb-default
124
+ install-rb-default: pre-install-rb-default
125
+ pre-install-rb: Makefile
126
+ pre-install-rb-default: Makefile
127
+ $(RUBYARCHDIR):
128
+ $(MAKEDIRS) $@
129
+
130
+ site-install: site-install-so site-install-rb
131
+ site-install-so: install-so
132
+ site-install-rb: install-rb
133
+
134
+ .SUFFIXES: .c .m .cc .cxx .cpp .C .o
135
+
136
+ .cc.o:
137
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
138
+
139
+ .cxx.o:
140
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
141
+
142
+ .cpp.o:
143
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
144
+
145
+ .C.o:
146
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
147
+
148
+ .c.o:
149
+ $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
150
+
151
+ $(DLLIB): $(OBJS) Makefile
152
+ @-$(RM) $@
153
+ $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
154
+
155
+
156
+
157
+ $(OBJS): ruby.h defines.h
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+
3
+ create_makefile('langscan/pairmatcher/pairmatcher')
@@ -0,0 +1,890 @@
1
+ /*
2
+ * pairmatcher.c - a pair matching parser
3
+ *
4
+ * Copyright (C) 2005 Akira Tanaka <akr@m17n.org>
5
+ * All rights reserved.
6
+ * This is free software with ABSOLUTELY NO WARRANTY.
7
+ *
8
+ * You can redistribute it and/or modify it under the terms of
9
+ * the GNU General Public License version 2.
10
+ */
11
+
12
+ #include <ruby.h>
13
+
14
+ static ID id_get_token, id_new, id_call;
15
+ static VALUE Fragment;
16
+
17
+ #ifndef RSTRUCT_PTR
18
+ # define RSTRUCT_PTR(st) (RSTRUCT(st)->ptr)
19
+ #endif
20
+ #ifndef RSTRUCT_LEN
21
+ # define RSTRUCT_LEN(st) (RSTRUCT(st)->len)
22
+ #endif
23
+
24
+ #ifndef RARRAY_PTR
25
+ # define RARRAY_PTR(str) (RARRAY(str)->ptr)
26
+ #endif
27
+ #ifndef RARRAY_LEN
28
+ # define RARRAY_LEN(str) (RARRAY(str)->len)
29
+ #endif
30
+
31
+ #ifdef SYMBOL_P
32
+ # define Check_Symbol(val) do { if (!SYMBOL_P(val)) { Check_Type(val, T_SYMBOL); } } while (0)
33
+ #else
34
+ # define Check_Symbol(val) Check_Type(val, T_SYMBOL)
35
+ #endif
36
+
37
+ static VALUE
38
+ fragment_type(VALUE fragment)
39
+ {
40
+ VALUE val;
41
+ Check_Type(fragment, T_STRUCT);
42
+ if (RBASIC(fragment)->klass != Fragment) {
43
+ rb_raise(rb_eTypeError, "not fragment");
44
+ }
45
+ val = RSTRUCT_PTR(fragment)[0];
46
+ Check_Symbol(val);
47
+ return val;
48
+ }
49
+
50
+ static VALUE
51
+ fragment_text(VALUE fragment)
52
+ {
53
+ VALUE val;
54
+ Check_Type(fragment, T_STRUCT);
55
+ if (RBASIC(fragment)->klass != Fragment) {
56
+ rb_raise(rb_eTypeError, "not fragment");
57
+ }
58
+ val = RSTRUCT_PTR(fragment)[1];
59
+ StringValue(val);
60
+ return val;
61
+ }
62
+
63
+ static int
64
+ fragment_byteno(VALUE fragment)
65
+ {
66
+ VALUE val;
67
+ Check_Type(fragment, T_STRUCT);
68
+ if (RBASIC(fragment)->klass != Fragment) {
69
+ rb_raise(rb_eTypeError, "not fragment");
70
+ }
71
+ val = RSTRUCT_PTR(fragment)[3];
72
+ return NUM2INT(val);
73
+ }
74
+
75
+ typedef struct {
76
+ unsigned char before_open_max;
77
+ unsigned char after_open_max;
78
+ unsigned char before_close_max;
79
+ unsigned char after_close_max;
80
+ VALUE pair_defs;
81
+ VALUE intertoken_defs;
82
+ VALUE recent_tokens;
83
+ VALUE pair_stack;
84
+ VALUE closed_pairs;
85
+ } pairmatcher_t;
86
+
87
+ static void pairmatcher_mark(pairmatcher_t *pairmatcher)
88
+ {
89
+ if (pairmatcher == NULL)
90
+ return;
91
+ rb_gc_mark(pairmatcher->pair_defs);
92
+ rb_gc_mark(pairmatcher->intertoken_defs);
93
+ rb_gc_mark(pairmatcher->recent_tokens);
94
+ rb_gc_mark(pairmatcher->pair_stack);
95
+ rb_gc_mark(pairmatcher->closed_pairs);
96
+ }
97
+
98
+ static void pairmatcher_free(pairmatcher_t *pairmatcher)
99
+ {
100
+ if (pairmatcher == NULL)
101
+ return;
102
+ free((void *)pairmatcher);
103
+ }
104
+
105
+ static VALUE pairmatcher_s_allocate(VALUE klass)
106
+ {
107
+ return Data_Wrap_Struct(klass, pairmatcher_mark, pairmatcher_free, NULL);
108
+ }
109
+
110
+ static VALUE pairmatcher_initialize(
111
+ VALUE self,
112
+ VALUE before_open_max,
113
+ VALUE after_open_max,
114
+ VALUE before_close_max,
115
+ VALUE after_close_max)
116
+ {
117
+ pairmatcher_t *pairmatcher;
118
+
119
+ Data_Get_Struct(self, pairmatcher_t, pairmatcher);
120
+ if (pairmatcher != NULL) { rb_raise(rb_eArgError, "called twice"); }
121
+
122
+ pairmatcher = ALLOC(pairmatcher_t);
123
+ pairmatcher->pair_defs = Qnil;
124
+ pairmatcher->intertoken_defs = Qnil;
125
+ pairmatcher->recent_tokens = Qnil;
126
+ pairmatcher->pair_stack = Qnil;
127
+ pairmatcher->closed_pairs = Qnil;
128
+ DATA_PTR(self) = pairmatcher;
129
+
130
+ pairmatcher->before_open_max = NUM2INT(before_open_max);
131
+ pairmatcher->after_open_max = NUM2INT(after_open_max);
132
+ pairmatcher->before_close_max = NUM2INT(before_close_max);
133
+ pairmatcher->after_close_max = NUM2INT(after_close_max);
134
+ pairmatcher->pair_defs = rb_ary_new();
135
+ //RBASIC(pairmatcher->pair_defs)->klass = 0;
136
+ pairmatcher->intertoken_defs = rb_ary_new();
137
+ //RBASIC(pairmatcher->intertoken_defs)->klass = 0;
138
+ pairmatcher->recent_tokens = rb_ary_new();
139
+ //RBASIC(pairmatcher->recent_tokens)->klass = 0;
140
+ pairmatcher->pair_stack = rb_ary_new();
141
+ //RBASIC(pairmatcher->pair_stack)->klass = 0;
142
+ pairmatcher->closed_pairs = rb_ary_new();
143
+ //RBASIC(pairmatcher->closed_pairs)->klass = 0;
144
+ return self;
145
+ }
146
+
147
+ #define GetPM(obj, var) \
148
+ do { \
149
+ Data_Get_Struct((obj), pairmatcher_t, (var)); \
150
+ if ((var) == NULL) { rb_raise(rb_eArgError, "not initialized"); } \
151
+ } while(0)
152
+
153
+ static VALUE
154
+ pairmatcher_get_before_open_max(VALUE self)
155
+ {
156
+ pairmatcher_t *pairmatcher;
157
+ GetPM(self, pairmatcher);
158
+ if (pairmatcher == NULL) { rb_raise(rb_eArgError, "not initialized"); }
159
+ return INT2NUM(pairmatcher->before_open_max);
160
+ }
161
+
162
+ static VALUE
163
+ pairmatcher_get_after_open_max(VALUE self)
164
+ {
165
+ pairmatcher_t *pairmatcher;
166
+ GetPM(self, pairmatcher);
167
+ return INT2NUM(pairmatcher->after_open_max);
168
+ }
169
+
170
+ static VALUE
171
+ pairmatcher_get_before_close_max(VALUE self)
172
+ {
173
+ pairmatcher_t *pairmatcher;
174
+ GetPM(self, pairmatcher);
175
+ return INT2NUM(pairmatcher->before_close_max);
176
+ }
177
+
178
+ static VALUE
179
+ pairmatcher_get_after_close_max(VALUE self)
180
+ {
181
+ pairmatcher_t *pairmatcher;
182
+ GetPM(self, pairmatcher);
183
+ return INT2NUM(pairmatcher->after_close_max);
184
+ }
185
+
186
+ static VALUE
187
+ pairmatcher_define_pair(VALUE self, VALUE pair_type, VALUE open_type, VALUE open_text, VALUE close_type, VALUE close_text)
188
+ {
189
+ pairmatcher_t *pairmatcher;
190
+ VALUE def;
191
+ Check_Symbol(open_type);
192
+ if (open_text != Qnil) {
193
+ StringValue(open_text);
194
+ open_text = rb_str_new4(open_text);
195
+ }
196
+ Check_Symbol(close_type);
197
+ if (close_text != Qnil) {
198
+ StringValue(close_text);
199
+ close_text = rb_str_new4(close_text);
200
+ }
201
+
202
+ def = rb_ary_new3(5, open_type, open_text, close_type, close_text, pair_type);
203
+ //RBASIC(def)->klass = 0;
204
+
205
+ GetPM(self, pairmatcher);
206
+ rb_ary_push(pairmatcher->pair_defs, def);
207
+
208
+ return Qnil;
209
+ }
210
+
211
+ static VALUE
212
+ pairmatcher_define_intertoken_fragment(VALUE self, VALUE type, VALUE text)
213
+ {
214
+ pairmatcher_t *pairmatcher;
215
+ VALUE def;
216
+ Check_Symbol(type);
217
+ if (text != Qnil) {
218
+ StringValue(text);
219
+ text = rb_str_new4(text);
220
+ }
221
+
222
+ def = rb_ary_new3(2, type, text);
223
+ //RBASIC(def)->klass = 0;
224
+
225
+ GetPM(self, pairmatcher);
226
+ rb_ary_push(pairmatcher->intertoken_defs, def);
227
+
228
+ return Qnil;
229
+ }
230
+
231
+ static VALUE
232
+ get_token(VALUE tokenizer)
233
+ {
234
+ return rb_funcall(tokenizer, id_get_token, 0);
235
+ }
236
+
237
+ static VALUE
238
+ open_token_p(pairmatcher_t *pairmatcher, VALUE token)
239
+ {
240
+ int i;
241
+ VALUE type = fragment_type(token);
242
+ VALUE text = fragment_text(token);
243
+ for (i = 0; i < RARRAY_LEN(pairmatcher->pair_defs); i++) {
244
+ VALUE def = RARRAY_PTR(pairmatcher->pair_defs)[i];
245
+ VALUE open_type = RARRAY_PTR(def)[0];
246
+ VALUE open_text = RARRAY_PTR(def)[1];
247
+ if (open_type == Qnil || open_type == type) {
248
+ if (open_text == Qnil || rb_str_cmp(open_text, text) == 0) {
249
+ return def;
250
+ }
251
+ }
252
+ }
253
+ return Qfalse;
254
+ }
255
+
256
+ static VALUE
257
+ close_token_p(pairmatcher_t *pairmatcher, VALUE token, int *i)
258
+ {
259
+ VALUE type = fragment_type(token);
260
+ VALUE text = fragment_text(token);
261
+ for (; *i < RARRAY_LEN(pairmatcher->pair_defs); (*i)++) {
262
+ VALUE def = RARRAY_PTR(pairmatcher->pair_defs)[*i];
263
+ VALUE close_type = RARRAY_PTR(def)[2];
264
+ VALUE close_text = RARRAY_PTR(def)[3];
265
+ if (close_type == Qnil || close_type == type) {
266
+ if (close_text == Qnil || rb_str_cmp(close_text, text) == 0) {
267
+ return def;
268
+ }
269
+ }
270
+ }
271
+ return Qfalse;
272
+ }
273
+
274
+ #define pair_get_pair_def(pair) (RSTRUCT_PTR(pair)[0])
275
+ #define pair_get_before_open_len(pair) FIX2INT(RSTRUCT_PTR(pair)[1])
276
+ #define pair_get_around_open_tokens(pair) (RSTRUCT_PTR(pair)[2])
277
+ #define pair_get_before_close_len(pair) FIX2INT(RSTRUCT_PTR(pair)[3])
278
+ #define pair_get_around_close_tokens(pair) (RSTRUCT_PTR(pair)[4])
279
+ #define pair_get_outer(pair) (RSTRUCT_PTR(pair)[5])
280
+ #define pair_set_pair_def(pair, val) (RSTRUCT_PTR(pair)[0] = (val))
281
+ #define pair_set_before_open_len(pair, len) (RSTRUCT_PTR(pair)[1] = INT2FIX(len))
282
+ #define pair_set_around_open_tokens(pair, val) (RSTRUCT_PTR(pair)[2] = (val))
283
+ #define pair_set_before_close_len(pair, len) (RSTRUCT_PTR(pair)[3] = INT2FIX(len))
284
+ #define pair_set_around_close_tokens(pair, val) (RSTRUCT_PTR(pair)[4] = (val))
285
+
286
+ #define pair_get_after_open_len(pair) (RARRAY_LEN(pair_get_around_open_tokens(pair))-pair_get_before_open_len(pair)-1)
287
+ #define pair_get_after_close_len(pair) (RARRAY_LEN(pair_get_around_close_tokens(pair))-pair_get_before_close_len(pair)-1)
288
+
289
+ static VALUE Pair;
290
+
291
+ static VALUE
292
+ make_pair(VALUE pair_def, int before_open_len, VALUE around_open_tokens, VALUE outer)
293
+ {
294
+ VALUE pair = rb_struct_new(Pair,
295
+ pair_def,
296
+ INT2FIX(before_open_len),
297
+ around_open_tokens,
298
+ Qnil,
299
+ Qnil,
300
+ outer);
301
+ return pair;
302
+ }
303
+
304
+ static int
305
+ concat_recent_tokens(pairmatcher_t *pm, int max, VALUE ary)
306
+ {
307
+ int i;
308
+ if (RARRAY_LEN(pm->recent_tokens) <= max)
309
+ max = RARRAY_LEN(pm->recent_tokens);
310
+ for (i = 0; i < max; i++) {
311
+ rb_ary_push(ary, RARRAY_PTR(pm->recent_tokens)[RARRAY_LEN(pm->recent_tokens)-max+i]);
312
+ }
313
+ return max;
314
+ }
315
+
316
+ static void
317
+ put_open_token(pairmatcher_t *pm, VALUE open_token, VALUE pair_def)
318
+ {
319
+ int before_open_len, stack_len;
320
+ VALUE pair;
321
+ VALUE around_open_tokens;
322
+ around_open_tokens = rb_ary_new2(pm->before_open_max+1+pm->after_open_max);
323
+ before_open_len = concat_recent_tokens(pm, pm->before_open_max, around_open_tokens);
324
+ rb_ary_push(around_open_tokens, open_token);
325
+ stack_len = RARRAY_LEN(pm->pair_stack);
326
+ pair = make_pair(pair_def, before_open_len, around_open_tokens,
327
+ stack_len ? RARRAY_PTR(pm->pair_stack)[stack_len-1] : Qnil);
328
+ rb_ary_push(pm->pair_stack, pair);
329
+ }
330
+
331
+ static int
332
+ matching_open_depth(pairmatcher_t *pm, VALUE open_token, VALUE pair_def)
333
+ {
334
+ int i;
335
+ for (i = RARRAY_LEN(pm->pair_stack) - 1; 0 <= i; i--) {
336
+ if (pair_get_pair_def(RARRAY_PTR(pm->pair_stack)[i]) == pair_def) {
337
+ return i;
338
+ }
339
+ }
340
+ return -1;
341
+ }
342
+
343
+ static void
344
+ report_token_list_now(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len)
345
+ {
346
+ while (len) {
347
+ rb_funcall(reporter, id_call, 1, RARRAY_PTR(token_list)[beg]);
348
+ beg++;
349
+ len--;
350
+ }
351
+ }
352
+
353
+ static void
354
+ report_token_list_open_pair(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len)
355
+ {
356
+ int i;
357
+ for (i = RARRAY_LEN(pm->pair_stack) - 1; 0 <= i; i--) {
358
+ VALUE pair = RARRAY_PTR(pm->pair_stack)[i];
359
+ VALUE around_open = pair_get_around_open_tokens(pair);
360
+ int first_byteno = fragment_byteno(RARRAY_PTR(around_open)[0]);
361
+ int last_byteno = fragment_byteno(RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1]);
362
+ if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg])) {
363
+ report_token_list_now(pm, reporter, token_list, beg, len);
364
+ return;
365
+ }
366
+ /* last_byteno >= fragment_byteno(RARRAY_PTR(token_list)[beg]) */
367
+ if (fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) < first_byteno)
368
+ continue;
369
+ /*
370
+ * fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno
371
+ * first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])
372
+ */
373
+ if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])) {
374
+ int beg2 = beg+len-1;
375
+ int len2 = 1;
376
+ while (beg <= beg2 && last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg2-1])) {
377
+ beg2--;
378
+ len2++;
379
+ }
380
+ report_token_list_now(pm, reporter, token_list, beg2, len2);
381
+ len -= len2;
382
+ }
383
+ /*
384
+ * first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) <= last_byteno
385
+ * fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno
386
+ */
387
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])) {
388
+ len--;
389
+ }
390
+ if (len == 0)
391
+ return;
392
+ }
393
+ report_token_list_now(pm, reporter, token_list, beg, len);
394
+ }
395
+
396
+ static void
397
+ report_token_list_rec_closed_pair(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len, int closed_pairs_index)
398
+ {
399
+ VALUE pair;
400
+ VALUE around_close, around_open, first, last;
401
+ int first_byteno, last_byteno;
402
+ if (RARRAY_LEN(pm->closed_pairs) <= closed_pairs_index) {
403
+ report_token_list_open_pair(pm, reporter, token_list, beg, len);
404
+ return;
405
+ }
406
+ pair = RARRAY_PTR(pm->closed_pairs)[closed_pairs_index];
407
+ if (pair == Qnil) {
408
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
409
+ return;
410
+ }
411
+ around_close = pair_get_around_close_tokens(pair);
412
+ first = RARRAY_PTR(around_close)[0];
413
+ first_byteno = fragment_byteno(first);
414
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
415
+ len--;
416
+ if (len == 0)
417
+ return;
418
+ around_open = pair_get_around_open_tokens(pair);
419
+ first = RARRAY_PTR(around_open)[0];
420
+ first_byteno = fragment_byteno(first);
421
+ last = RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1];
422
+ last_byteno = fragment_byteno(last);
423
+ if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg]) ||
424
+ fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) < first_byteno) {
425
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
426
+ }
427
+ else if (first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg])) {
428
+ while (0 < len && fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno) {
429
+ beg++;
430
+ len--;
431
+ }
432
+ if (len)
433
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
434
+ }
435
+ else if (fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) <= last_byteno) {
436
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
437
+ len--;
438
+ if (len)
439
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
440
+ }
441
+ else {
442
+ int beg1, len1;
443
+ int beg2, len2;
444
+ beg1 = beg;
445
+ len1 = 1;
446
+ while (len1 < len && fragment_byteno(RARRAY_PTR(token_list)[beg1+len1]) < first_byteno)
447
+ len1++;
448
+ beg2 = beg + len - 1;
449
+ len2 = 1;
450
+ while (beg <= beg2-1 && last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg2-1])) {
451
+ beg2--;
452
+ len2++;
453
+ }
454
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg1, len1, closed_pairs_index+1);
455
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg2, len2, closed_pairs_index+1);
456
+ }
457
+ }
458
+
459
+ static void
460
+ report_token_list(pairmatcher_t *pm, VALUE token_list, int beg, int len, VALUE reporter)
461
+ {
462
+ if (len < 0)
463
+ len = RARRAY_LEN(token_list) - beg;
464
+ if (len == 0)
465
+ return;
466
+ if (RARRAY_LEN(pm->recent_tokens) != 0) {
467
+ int first_byteno = fragment_byteno(RARRAY_PTR(pm->recent_tokens)[0]);
468
+ if (first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[0]))
469
+ return;
470
+ while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
471
+ len--;
472
+ }
473
+ report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, 0);
474
+ }
475
+
476
+ static void
477
+ discard_unmatched_pair(pairmatcher_t *pm, VALUE reporter)
478
+ {
479
+ VALUE pair = rb_ary_pop(pm->pair_stack);
480
+ report_token_list(pm, pair_get_around_open_tokens(pair), 0, -1, reporter);
481
+ }
482
+
483
+ static void
484
+ put_close_token(pairmatcher_t *pm, VALUE close_token, VALUE pair_def, int depth, VALUE reporter)
485
+ {
486
+ VALUE pair, around_close_tokens;
487
+ int before_close_len;
488
+ while (depth+1 < RARRAY_LEN(pm->pair_stack)) {
489
+ discard_unmatched_pair(pm, reporter);
490
+ }
491
+ pair = rb_ary_pop(pm->pair_stack);
492
+ around_close_tokens = rb_ary_new2(pm->before_close_max+1+pm->after_close_max);
493
+ before_close_len = concat_recent_tokens(pm, pm->before_close_max, around_close_tokens);
494
+ rb_ary_push(around_close_tokens, close_token);
495
+ pair_set_before_close_len(pair, before_close_len);
496
+ pair_set_around_close_tokens(pair, around_close_tokens);
497
+ rb_ary_push(pm->closed_pairs, pair);
498
+ }
499
+
500
+ static void
501
+ add_recent(pairmatcher_t *pm, VALUE reporter, VALUE token)
502
+ {
503
+ int max = pm->before_open_max;
504
+ if (max < pm->before_close_max)
505
+ max = pm->before_close_max;
506
+ if (max == 0)
507
+ return;
508
+ if (RARRAY_LEN(pm->recent_tokens) < max) {
509
+ rb_ary_push(pm->recent_tokens, token);
510
+ }
511
+ else {
512
+ VALUE val = RARRAY_PTR(pm->recent_tokens)[0];
513
+ MEMMOVE(RARRAY_PTR(pm->recent_tokens),
514
+ RARRAY_PTR(pm->recent_tokens)+1,
515
+ VALUE, max-1);
516
+ RARRAY_PTR(pm->recent_tokens)[max-1] = token;
517
+ report_token_list(pm, rb_ary_new3(1, val), 0, -1, reporter);
518
+ }
519
+ }
520
+
521
+ static void
522
+ add_after_open(pairmatcher_t *pm, VALUE token)
523
+ {
524
+ int i;
525
+ int max = pm->after_open_max;
526
+ VALUE pair;
527
+ for (i = RARRAY_LEN(pm->pair_stack)-1; 0 <= i; i--) {
528
+ pair = RARRAY_PTR(pm->pair_stack)[i];
529
+ if (max <= pair_get_after_open_len(pair))
530
+ break;
531
+ rb_ary_push(pair_get_around_open_tokens(pair), token);
532
+ }
533
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
534
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
535
+ if (pair == Qnil)
536
+ continue;
537
+ if (pair_get_after_open_len(pair) < max)
538
+ rb_ary_push(pair_get_around_open_tokens(pair), token);
539
+ }
540
+ }
541
+
542
+ static void
543
+ add_after_close(pairmatcher_t *pm, VALUE token)
544
+ {
545
+ int i;
546
+ int max = pm->after_close_max;
547
+ VALUE pair;
548
+ for (i = RARRAY_LEN(pm->closed_pairs) - 1; 0 <= i; i--) {
549
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
550
+ if (pair == Qnil)
551
+ continue;
552
+ if (max <= pair_get_after_close_len(pair))
553
+ break;
554
+ rb_ary_push(pair_get_around_close_tokens(pair), token);
555
+ }
556
+ }
557
+
558
+ static void
559
+ discard_matched_pair(pairmatcher_t *pm, VALUE pair, VALUE reporter)
560
+ {
561
+ VALUE around_open = pair_get_around_open_tokens(pair);
562
+ VALUE around_close = pair_get_around_close_tokens(pair);
563
+ if (fragment_byteno(RARRAY_PTR(around_close)[0]) <=
564
+ fragment_byteno(RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1])) {
565
+ int len1 = RARRAY_LEN(around_open);
566
+ while (0 < len1 && fragment_byteno(RARRAY_PTR(around_close)[0]) <= fragment_byteno(RARRAY_PTR(around_open)[len1-1]))
567
+ len1--;
568
+ if (len1) {
569
+ report_token_list(pm, around_open, 0, len1, reporter);
570
+ }
571
+ report_token_list(pm, around_close, 0, -1, reporter);
572
+ }
573
+ else {
574
+ report_token_list(pm, around_open, 0, -1, reporter);
575
+ report_token_list(pm, around_close, 0, -1, reporter);
576
+ }
577
+ }
578
+
579
+ static VALUE
580
+ ary_subseq(VALUE ary, int beg, int len)
581
+ {
582
+ VALUE argv[2];
583
+ argv[0] = INT2NUM(beg);
584
+ argv[1] = INT2NUM(len);
585
+ return rb_ary_aref(2, argv, ary);
586
+ }
587
+
588
+ static void
589
+ yield_pair(VALUE pair)
590
+ {
591
+ rb_yield(pair);
592
+ }
593
+
594
+ static VALUE
595
+ pair_before_open_tokens(VALUE pair)
596
+ {
597
+ int before_len;
598
+ VALUE around_open = pair_get_around_open_tokens(pair);
599
+ before_len = pair_get_before_open_len(pair);
600
+ return ary_subseq(around_open, 0, before_len);
601
+ }
602
+
603
+ static VALUE
604
+ pair_around_open(VALUE pair, VALUE index)
605
+ {
606
+ int before_len;
607
+ int i = NUM2INT(index);
608
+ VALUE around_open = pair_get_around_open_tokens(pair);
609
+ before_len = pair_get_before_open_len(pair);
610
+ if (-before_len <= i && i <= pair_get_after_open_len(pair))
611
+ return rb_ary_entry(around_open, before_len+i);
612
+ else
613
+ return Qnil;
614
+ }
615
+
616
+ static VALUE
617
+ pair_open_token(VALUE pair)
618
+ {
619
+ int before_len;
620
+ VALUE around_open = pair_get_around_open_tokens(pair);
621
+ before_len = pair_get_before_open_len(pair);
622
+ return rb_ary_entry(around_open, before_len);
623
+ }
624
+
625
+ static VALUE
626
+ pair_after_open_tokens(VALUE pair)
627
+ {
628
+ int before_len, after_len;
629
+ VALUE around_open = pair_get_around_open_tokens(pair);
630
+ before_len = pair_get_before_open_len(pair);
631
+ after_len = pair_get_after_open_len(pair);
632
+ return ary_subseq(around_open, before_len+1, after_len);
633
+ return ary_subseq(around_open, 0, before_len);
634
+ }
635
+
636
+ static VALUE
637
+ pair_before_close_tokens(VALUE pair)
638
+ {
639
+ int before_len;
640
+ VALUE around_close = pair_get_around_close_tokens(pair);
641
+ if (around_close == Qnil) return Qnil;
642
+ before_len = pair_get_before_close_len(pair);
643
+ return ary_subseq(around_close, 0, before_len);
644
+ }
645
+
646
+ static VALUE
647
+ pair_around_close(VALUE pair, VALUE index)
648
+ {
649
+ int before_len;
650
+ int i = NUM2INT(index);
651
+ VALUE around_close = pair_get_around_close_tokens(pair);
652
+ if (around_close == Qnil) return Qnil;
653
+ before_len = pair_get_before_close_len(pair);
654
+ if (-before_len <= i && i <= pair_get_after_close_len(pair))
655
+ return rb_ary_entry(around_close, before_len+i);
656
+ else
657
+ return Qnil;
658
+ }
659
+
660
+ static VALUE
661
+ pair_close_token(VALUE pair)
662
+ {
663
+ int before_len;
664
+ VALUE around_close = pair_get_around_close_tokens(pair);
665
+ if (around_close == Qnil) return Qnil;
666
+ before_len = pair_get_before_close_len(pair);
667
+ return rb_ary_entry(around_close, before_len);
668
+ }
669
+
670
+ static VALUE
671
+ pair_after_close_tokens(VALUE pair)
672
+ {
673
+ int before_len, after_len;
674
+ VALUE around_close = pair_get_around_close_tokens(pair);
675
+ if (around_close == Qnil) return Qnil;
676
+ before_len = pair_get_before_close_len(pair);
677
+ after_len = pair_get_after_close_len(pair);
678
+ return ary_subseq(around_close, before_len+1, after_len);
679
+ return ary_subseq(around_close, 0, before_len);
680
+ }
681
+
682
+ static VALUE
683
+ pair_before_open_length(VALUE pair)
684
+ {
685
+ return INT2FIX(pair_get_before_open_len(pair));
686
+ }
687
+
688
+ static VALUE
689
+ pair_after_open_length(VALUE pair)
690
+ {
691
+ return INT2FIX(pair_get_after_open_len(pair));
692
+ }
693
+
694
+ static VALUE
695
+ pair_before_close_length(VALUE pair)
696
+ {
697
+ VALUE around_close = pair_get_around_close_tokens(pair);
698
+ if (around_close == Qnil) return Qnil;
699
+ return INT2FIX(pair_get_before_close_len(pair));
700
+ }
701
+
702
+ static VALUE
703
+ pair_after_close_length(VALUE pair)
704
+ {
705
+ VALUE around_close = pair_get_around_close_tokens(pair);
706
+ if (around_close == Qnil) return Qnil;
707
+ return INT2FIX(pair_get_after_close_len(pair));
708
+ }
709
+
710
+ static VALUE
711
+ pair_pair_type(VALUE pair)
712
+ {
713
+ VALUE def = pair_get_pair_def(pair);
714
+ return RARRAY_PTR(def)[4];
715
+ }
716
+
717
+ static void
718
+ check_closed_pairs(pairmatcher_t *pm, VALUE reporter)
719
+ {
720
+ int i, j;
721
+ int after_open_max = pm->after_open_max;
722
+ int after_close_max = pm->after_close_max;
723
+ VALUE pair;
724
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
725
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
726
+ if (pair == Qnil)
727
+ continue;
728
+ if (pair_get_after_open_len(pair) == after_open_max &&
729
+ pair_get_after_close_len(pair) == after_close_max) {
730
+ RARRAY_PTR(pm->closed_pairs)[i] = Qnil;
731
+ yield_pair(pair);
732
+ discard_matched_pair(pm, pair, reporter);
733
+ }
734
+ }
735
+ j = 0;
736
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
737
+ if (RARRAY_PTR(pm->closed_pairs)[i] != Qnil) {
738
+ RARRAY_PTR(pm->closed_pairs)[j] = RARRAY_PTR(pm->closed_pairs)[i];
739
+ j++;
740
+ }
741
+ }
742
+ while (j < RARRAY_LEN(pm->closed_pairs))
743
+ rb_ary_pop(pm->closed_pairs);
744
+ }
745
+
746
+ static void
747
+ put_token(pairmatcher_t *pairmatcher, VALUE token, VALUE reporter)
748
+ {
749
+ VALUE pair_def, tmp_pair_def;
750
+ int depth, max_depth, i;
751
+ add_after_open(pairmatcher, token);
752
+ add_after_close(pairmatcher, token);
753
+ check_closed_pairs(pairmatcher, reporter);
754
+
755
+ if ((pair_def = open_token_p(pairmatcher, token))) {
756
+ put_open_token(pairmatcher, token, pair_def);
757
+ }
758
+ else {
759
+ i = 0;
760
+ max_depth = 0;
761
+ pair_def = Qfalse;
762
+ while ((tmp_pair_def = close_token_p(pairmatcher, token, &i)) != Qfalse) {
763
+ depth = matching_open_depth(pairmatcher, token, tmp_pair_def);
764
+ if (max_depth <= depth) {
765
+ pair_def = tmp_pair_def;
766
+ max_depth = depth;
767
+ }
768
+ i++;
769
+ }
770
+ if (pair_def != Qfalse) {
771
+ put_close_token(pairmatcher, token, pair_def, max_depth, reporter);
772
+ }
773
+ }
774
+ add_recent(pairmatcher, reporter, token);
775
+ }
776
+
777
+ static int
778
+ intertoken_p(pairmatcher_t *pairmatcher, VALUE token_type)
779
+ {
780
+ int i;
781
+ for (i = 0; i < RARRAY_LEN(pairmatcher->intertoken_defs); i++) {
782
+ VALUE def = RARRAY_PTR(pairmatcher->intertoken_defs)[i];
783
+ VALUE def_type = RARRAY_PTR(def)[0];
784
+ //VALUE def_text = RARRAY_PTR(def)[1];
785
+ if (def_type == token_type) {
786
+ return 1;
787
+ }
788
+ }
789
+ return 0;
790
+ }
791
+
792
+ static void
793
+ finish(pairmatcher_t *pm, VALUE reporter)
794
+ {
795
+ int i;
796
+ VALUE pair;
797
+ for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
798
+ pair = RARRAY_PTR(pm->closed_pairs)[i];
799
+ if (pair == Qnil)
800
+ continue;
801
+ RARRAY_PTR(pm->closed_pairs)[i] = Qnil;
802
+ yield_pair(pair);
803
+ discard_matched_pair(pm, pair, reporter);
804
+ }
805
+ while (RARRAY_LEN(pm->pair_stack)) {
806
+ discard_unmatched_pair(pm, reporter);
807
+ }
808
+ report_token_list_now(pm, reporter, pm->recent_tokens, 0, RARRAY_LEN(pm->recent_tokens));
809
+ }
810
+
811
+ static void
812
+ parse(pairmatcher_t *pm, VALUE tokenizer, VALUE reporter)
813
+ {
814
+ VALUE token_info;
815
+ while ((token_info = get_token(tokenizer)) != Qnil) {
816
+ VALUE token_type, token_text, token_lineno, token_byteno;
817
+ VALUE token;
818
+ Check_Type(token_info, T_ARRAY);
819
+ if (RARRAY_LEN(token_info) != 8) {
820
+ rb_raise(rb_eArgError, "unexpected token");
821
+ }
822
+ token_type = RARRAY_PTR(token_info)[0];
823
+ token_text = RARRAY_PTR(token_info)[1];
824
+ token_lineno = RARRAY_PTR(token_info)[2];
825
+ token_byteno = RARRAY_PTR(token_info)[4];
826
+ token = rb_funcall(Fragment, id_new, 4, token_type, token_text, token_lineno, token_byteno);
827
+ if (intertoken_p(pm, token_type)) {
828
+ rb_funcall(reporter, id_call, 1, token);
829
+ }
830
+ else {
831
+ put_token(pm, token, reporter);
832
+ }
833
+ }
834
+ finish(pm, reporter);
835
+ }
836
+
837
+ static VALUE
838
+ pairmatcher_parse(VALUE self, VALUE tokenizer, VALUE reporter)
839
+ {
840
+ pairmatcher_t *pairmatcher;
841
+ GetPM(self, pairmatcher);
842
+ parse(pairmatcher, tokenizer, reporter);
843
+
844
+ return Qnil;
845
+ }
846
+
847
+ void Init_pairmatcher(void)
848
+ {
849
+ VALUE LangScan = rb_const_get(rb_cObject, rb_intern("LangScan"));
850
+ VALUE PairMatcher = rb_define_class_under(LangScan, "PairMatcher", rb_cData);
851
+ Fragment = rb_const_get(LangScan, rb_intern("Fragment"));
852
+ rb_global_variable(&Fragment);
853
+
854
+ id_get_token = rb_intern("get_token");
855
+ id_new = rb_intern("new");
856
+ id_call = rb_intern("call");
857
+
858
+ rb_define_alloc_func(PairMatcher, pairmatcher_s_allocate);
859
+ rb_define_method(PairMatcher, "initialize", pairmatcher_initialize, 4);
860
+ //rb_define_method(PairMatcher, "initialize_copy", pairmatcher_initialize_copy, 1);
861
+ rb_define_method(PairMatcher, "define_intertoken_fragment", pairmatcher_define_intertoken_fragment, 2);
862
+ rb_define_method(PairMatcher, "define_pair", pairmatcher_define_pair, 5);
863
+ rb_define_method(PairMatcher, "before_open_max", pairmatcher_get_before_open_max, 0);
864
+ rb_define_method(PairMatcher, "after_open_max", pairmatcher_get_after_open_max, 0);
865
+ rb_define_method(PairMatcher, "before_close_max", pairmatcher_get_before_close_max, 0);
866
+ rb_define_method(PairMatcher, "after_close_max", pairmatcher_get_after_close_max, 0);
867
+ rb_define_method(PairMatcher, "parse", pairmatcher_parse, 2);
868
+
869
+ Pair = rb_struct_define("LangScanPair",
870
+ "pair_def",
871
+ "before_open_len",
872
+ "around_open_tokens",
873
+ "before_close_len",
874
+ "around_close_tokens",
875
+ "outer",
876
+ NULL);
877
+ rb_define_method(Pair, "before_open_tokens", pair_before_open_tokens, 0);
878
+ rb_define_method(Pair, "before_open_length", pair_before_open_length, 0);
879
+ rb_define_method(Pair, "around_open", pair_around_open, 1);
880
+ rb_define_method(Pair, "open_token", pair_open_token, 0);
881
+ rb_define_method(Pair, "after_open_tokens", pair_after_open_tokens, 0);
882
+ rb_define_method(Pair, "after_open_length", pair_after_open_length, 0);
883
+ rb_define_method(Pair, "before_close_tokens", pair_before_close_tokens, 0);
884
+ rb_define_method(Pair, "before_close_length", pair_before_close_length, 0);
885
+ rb_define_method(Pair, "around_close", pair_around_close, 1);
886
+ rb_define_method(Pair, "close_token", pair_close_token, 0);
887
+ rb_define_method(Pair, "after_close_tokens", pair_after_close_tokens, 0);
888
+ rb_define_method(Pair, "after_close_length", pair_after_close_length, 0);
889
+ rb_define_method(Pair, "pair_type", pair_pair_type, 0);
890
+ }