langscan 1.2-x86-mswin32-60
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS.txt +19 -0
- data/History.txt +126 -0
- data/Manifest.txt +167 -0
- data/README.rdoc +91 -0
- data/Rakefile +40 -0
- data/ext/langscan/_make_c.rb +20 -0
- data/ext/langscan/_make_h.rb +30 -0
- data/ext/langscan/_template.c +134 -0
- data/ext/langscan/_template.h +53 -0
- data/ext/langscan/c/c/Makefile +188 -0
- data/ext/langscan/c/c/c.c +134 -0
- data/ext/langscan/c/c/c.h +66 -0
- data/ext/langscan/c/c/ctok.c +4629 -0
- data/ext/langscan/c/c/ctok.l +212 -0
- data/ext/langscan/c/c/extconf.rb +3 -0
- data/ext/langscan/c/c/modulename.txt +1 -0
- data/ext/langscan/c/c/tokenlist.txt +13 -0
- data/ext/langscan/csharp/csharp/Makefile +188 -0
- data/ext/langscan/csharp/csharp/csharp.c +134 -0
- data/ext/langscan/csharp/csharp/csharp.h +65 -0
- data/ext/langscan/csharp/csharp/csharptok.c +2971 -0
- data/ext/langscan/csharp/csharp/csharptok.l +200 -0
- data/ext/langscan/csharp/csharp/extconf.rb +3 -0
- data/ext/langscan/csharp/csharp/modulename.txt +1 -0
- data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
- data/ext/langscan/d/d/Makefile +188 -0
- data/ext/langscan/d/d/d.c +134 -0
- data/ext/langscan/d/d/d.h +64 -0
- data/ext/langscan/d/d/dtok.c +5468 -0
- data/ext/langscan/d/d/dtok.l +282 -0
- data/ext/langscan/d/d/extconf.rb +3 -0
- data/ext/langscan/d/d/modulename.txt +1 -0
- data/ext/langscan/d/d/tokenlist.txt +11 -0
- data/ext/langscan/elisp/elisp/Makefile +188 -0
- data/ext/langscan/elisp/elisp/elisp.c +134 -0
- data/ext/langscan/elisp/elisp/elisp.h +62 -0
- data/ext/langscan/elisp/elisp/elisptok.c +2108 -0
- data/ext/langscan/elisp/elisp/elisptok.l +151 -0
- data/ext/langscan/elisp/elisp/extconf.rb +3 -0
- data/ext/langscan/elisp/elisp/modulename.txt +1 -0
- data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
- data/ext/langscan/java/java/Makefile +188 -0
- data/ext/langscan/java/java/extconf.rb +3 -0
- data/ext/langscan/java/java/java.c +134 -0
- data/ext/langscan/java/java/java.h +64 -0
- data/ext/langscan/java/java/javatok.c +2097 -0
- data/ext/langscan/java/java/javatok.l +155 -0
- data/ext/langscan/java/java/modulename.txt +1 -0
- data/ext/langscan/java/java/tokenlist.txt +11 -0
- data/ext/langscan/javascript/javascript/Makefile +188 -0
- data/ext/langscan/javascript/javascript/extconf.rb +3 -0
- data/ext/langscan/javascript/javascript/javascript.c +134 -0
- data/ext/langscan/javascript/javascript/javascript.h +63 -0
- data/ext/langscan/javascript/javascript/javascripttok.c +2058 -0
- data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
- data/ext/langscan/javascript/javascript/modulename.txt +1 -0
- data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
- data/ext/langscan/pairmatcher/pairmatcher/Makefile +188 -0
- data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
- data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
- data/ext/langscan/php/php/Makefile +188 -0
- data/ext/langscan/php/php/extconf.rb +3 -0
- data/ext/langscan/php/php/modulename.txt +1 -0
- data/ext/langscan/php/php/php.c +134 -0
- data/ext/langscan/php/php/php.h +64 -0
- data/ext/langscan/php/php/phptok.c +2413 -0
- data/ext/langscan/php/php/phptok.l +212 -0
- data/ext/langscan/php/php/tokenlist.txt +11 -0
- data/ext/langscan/post-distclean.rb +21 -0
- data/ext/langscan/pre-config.rb +57 -0
- data/ext/langscan/python/python/Makefile +188 -0
- data/ext/langscan/python/python/extconf.rb +3 -0
- data/ext/langscan/python/python/modulename.txt +1 -0
- data/ext/langscan/python/python/python.c +134 -0
- data/ext/langscan/python/python/python.h +61 -0
- data/ext/langscan/python/python/pythontok.c +2109 -0
- data/ext/langscan/python/python/pythontok.l +155 -0
- data/ext/langscan/python/python/tokenlist.txt +8 -0
- data/ext/langscan/ruby/compat/ripper/Makefile +189 -0
- data/ext/langscan/ruby/compat/ripper/depend +1 -0
- data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
- data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
- data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
- data/ext/langscan/scheme/scheme/Makefile +188 -0
- data/ext/langscan/scheme/scheme/extconf.rb +3 -0
- data/ext/langscan/scheme/scheme/modulename.txt +1 -0
- data/ext/langscan/scheme/scheme/scheme.c +134 -0
- data/ext/langscan/scheme/scheme/scheme.h +60 -0
- data/ext/langscan/scheme/scheme/schemetok.c +2454 -0
- data/ext/langscan/scheme/scheme/schemetok.l +177 -0
- data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
- data/ext/langscan/sh/sh/Makefile +188 -0
- data/ext/langscan/sh/sh/extconf.rb +3 -0
- data/ext/langscan/sh/sh/modulename.txt +1 -0
- data/ext/langscan/sh/sh/sh.c +134 -0
- data/ext/langscan/sh/sh/sh.h +61 -0
- data/ext/langscan/sh/sh/shtok.c +2477 -0
- data/ext/langscan/sh/sh/shtok.l +325 -0
- data/ext/langscan/sh/sh/tokenlist.txt +8 -0
- data/lib/langscan.rb +124 -0
- data/lib/langscan/_common.rb +50 -0
- data/lib/langscan/_easyscanner.rb +78 -0
- data/lib/langscan/_pairmatcher.rb +46 -0
- data/lib/langscan/_type.rb +125 -0
- data/lib/langscan/autoconf.rb +51 -0
- data/lib/langscan/automake.rb +51 -0
- data/lib/langscan/brainfuck.rb +48 -0
- data/lib/langscan/c.rb +144 -0
- data/lib/langscan/c/c.so +0 -0
- data/lib/langscan/csharp.rb +101 -0
- data/lib/langscan/csharp/csharp.so +0 -0
- data/lib/langscan/css.rb +109 -0
- data/lib/langscan/d.rb +201 -0
- data/lib/langscan/d/d.so +0 -0
- data/lib/langscan/eiffel.rb +167 -0
- data/lib/langscan/elisp.rb +132 -0
- data/lib/langscan/elisp/elisp.so +0 -0
- data/lib/langscan/io.rb +84 -0
- data/lib/langscan/java.rb +95 -0
- data/lib/langscan/java/java.so +0 -0
- data/lib/langscan/javascript.rb +97 -0
- data/lib/langscan/javascript/javascript.so +0 -0
- data/lib/langscan/lua.rb +116 -0
- data/lib/langscan/ocaml.rb +298 -0
- data/lib/langscan/ocaml/camlexer.ml +28 -0
- data/lib/langscan/ocaml/lexer.mll +230 -0
- data/lib/langscan/ocaml/types.ml +36 -0
- data/lib/langscan/pairmatcher/pairmatcher.so +0 -0
- data/lib/langscan/perl.rb +87 -0
- data/lib/langscan/perl/tokenizer.pl +231 -0
- data/lib/langscan/php.rb +80 -0
- data/lib/langscan/php/php.so +0 -0
- data/lib/langscan/python.rb +101 -0
- data/lib/langscan/python/python.so +0 -0
- data/lib/langscan/rpmspec.rb +71 -0
- data/lib/langscan/ruby.rb +164 -0
- data/lib/langscan/ruby/compat/README +5 -0
- data/lib/langscan/ruby/compat/ripper.rb +4 -0
- data/lib/langscan/ruby/compat/ripper.so +0 -0
- data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
- data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
- data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
- data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
- data/lib/langscan/scheme.rb +160 -0
- data/lib/langscan/scheme/scheme.so +0 -0
- data/lib/langscan/sh.rb +116 -0
- data/lib/langscan/sh/sh.so +0 -0
- data/lib/langscan/text.rb +37 -0
- data/metaconfig +2 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/setup.rb +1604 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/langscan.rake +42 -0
- data/test/langscan/brainfuck/test/test_scan.rb +55 -0
- data/test/langscan/c/test/test_scan.rb +216 -0
- data/test/langscan/c/test/test_token.rb +41 -0
- data/test/langscan/csharp/test/test_scan.rb +157 -0
- data/test/langscan/css/test/test_css.rb +79 -0
- data/test/langscan/d/test/test_scan.rb +233 -0
- data/test/langscan/d/test/test_token.rb +205 -0
- data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
- data/test/langscan/elisp/test/test_elisp.rb +177 -0
- data/test/langscan/io/test/test_io.rb +79 -0
- data/test/langscan/java/test/test_java.rb +74 -0
- data/test/langscan/javascript/test/test_javascript.rb +39 -0
- data/test/langscan/lua/test/test_lua.rb +69 -0
- data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
- data/test/langscan/php/test/test_scan.rb +138 -0
- data/test/langscan/python/test/test_scan.rb +105 -0
- data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
- data/test/langscan/ruby/test/test_scan.rb +71 -0
- data/test/langscan/scheme/test/test_scan.rb +198 -0
- data/test/test_helper.rb +7 -0
- data/test/test_langscan.rb +123 -0
- metadata +320 -0
@@ -0,0 +1,147 @@
|
|
1
|
+
/*
|
2
|
+
* javascript.l - a lex rule for JavaScript
|
3
|
+
*
|
4
|
+
* Copyright (C) 2005 Keisuke Nishida <knishida@open-cobol.org>
|
5
|
+
* All rights reserved.
|
6
|
+
* This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
*
|
8
|
+
* You can redistribute it and/or modify it under the terms of
|
9
|
+
* the GNU General Public License version 2.
|
10
|
+
*/
|
11
|
+
|
12
|
+
%option reentrant
|
13
|
+
%option prefix="langscan_javascript_lex_"
|
14
|
+
%option noyywrap
|
15
|
+
%option nodefault
|
16
|
+
|
17
|
+
slash \/
|
18
|
+
star \*
|
19
|
+
nonstar [^\*]
|
20
|
+
nonslashstar [^\/\*]
|
21
|
+
commentcontent {star}+{nonslashstar}{nonstar}*
|
22
|
+
comment {slash}{star}{nonstar}*{commentcontent}*{star}+{slash}
|
23
|
+
|
24
|
+
%{
|
25
|
+
|
26
|
+
#include "javascript.h"
|
27
|
+
|
28
|
+
#define YY_EXTRA_TYPE langscan_javascript_lex_extra_t *
|
29
|
+
|
30
|
+
#if YY_NULL != 0
|
31
|
+
#error "YY_NULL is not 0."
|
32
|
+
#endif
|
33
|
+
|
34
|
+
#define YY_DECL langscan_javascript_token_t langscan_javascript_lex_lex(yyscan_t yyscanner)
|
35
|
+
|
36
|
+
#define YY_INPUT(buf,result,max_size) \
|
37
|
+
if (!yyextra->eof) { \
|
38
|
+
result = yyextra->user_read(&(yyextra->user_data), (buf), (max_size)); \
|
39
|
+
if (result == 0) \
|
40
|
+
yyextra->eof = 1; \
|
41
|
+
}
|
42
|
+
|
43
|
+
#define UPD update_pos(yyextra, yytext, yyleng)
|
44
|
+
static void update_pos(langscan_javascript_lex_extra_t *, char *, int);
|
45
|
+
|
46
|
+
#define report(token) \
|
47
|
+
do { \
|
48
|
+
yyextra->text = yytext; \
|
49
|
+
yyextra->leng = yyleng; \
|
50
|
+
return langscan_javascript_##token; \
|
51
|
+
} while (0)
|
52
|
+
|
53
|
+
%}
|
54
|
+
|
55
|
+
%%
|
56
|
+
[ \t\f\r]+ { UPD; report(space); }
|
57
|
+
\n { UPD; report(space); }
|
58
|
+
"//".* { UPD; report(comment); }
|
59
|
+
{comment} { UPD; report(comment); }
|
60
|
+
\"([^\\\"]|\\.)*\" { UPD; report(string); }
|
61
|
+
[A-Za-z_][0-9A-Za-z_]* { UPD; report(ident); }
|
62
|
+
. { UPD; report(punct); }
|
63
|
+
|
64
|
+
%%
|
65
|
+
|
66
|
+
static void update_pos(
|
67
|
+
langscan_javascript_lex_extra_t *extra,
|
68
|
+
char *text,
|
69
|
+
int leng)
|
70
|
+
{
|
71
|
+
int i, j;
|
72
|
+
extra->beg_byteno = extra->end_byteno;
|
73
|
+
extra->beg_lineno = extra->end_lineno;
|
74
|
+
extra->beg_columnno = extra->end_columnno;
|
75
|
+
j = 0;
|
76
|
+
for (i = 0; i < leng; i++) {
|
77
|
+
if (text[i] == '\n') {
|
78
|
+
extra->end_lineno++;
|
79
|
+
j = i + 1;
|
80
|
+
extra->end_columnno = 0;
|
81
|
+
}
|
82
|
+
}
|
83
|
+
extra->end_columnno += leng - j;
|
84
|
+
extra->end_byteno += leng;
|
85
|
+
}
|
86
|
+
|
87
|
+
langscan_javascript_tokenizer_t *langscan_javascript_make_tokenizer(
|
88
|
+
size_t (*user_read)(void **user_data_p, char *buf, size_t maxlen),
|
89
|
+
void *user_data)
|
90
|
+
{
|
91
|
+
langscan_javascript_tokenizer_t *tokenizer;
|
92
|
+
langscan_javascript_lex_extra_t *extra;
|
93
|
+
tokenizer = (langscan_javascript_tokenizer_t *)malloc(sizeof(langscan_javascript_tokenizer_t));
|
94
|
+
if (tokenizer == NULL)
|
95
|
+
return NULL;
|
96
|
+
extra = (langscan_javascript_lex_extra_t *)malloc(sizeof(langscan_javascript_lex_extra_t));
|
97
|
+
if (extra == NULL)
|
98
|
+
return NULL;
|
99
|
+
extra->user_read = user_read;
|
100
|
+
extra->user_data = user_data;
|
101
|
+
extra->beg_lineno = 1;
|
102
|
+
extra->beg_columnno = 0;
|
103
|
+
extra->beg_byteno = 0;
|
104
|
+
extra->end_lineno = 1;
|
105
|
+
extra->end_columnno = 0;
|
106
|
+
extra->end_byteno = 0;
|
107
|
+
extra->eof = 0;
|
108
|
+
tokenizer->extra = extra;
|
109
|
+
langscan_javascript_lex_lex_init(&tokenizer->scanner);
|
110
|
+
langscan_javascript_lex_set_extra(extra, tokenizer->scanner);
|
111
|
+
return tokenizer;
|
112
|
+
}
|
113
|
+
|
114
|
+
langscan_javascript_token_t langscan_javascript_get_token(langscan_javascript_tokenizer_t *tokenizer)
|
115
|
+
{
|
116
|
+
return langscan_javascript_lex_lex(tokenizer->scanner);
|
117
|
+
}
|
118
|
+
|
119
|
+
void langscan_javascript_free_tokenizer(langscan_javascript_tokenizer_t *tokenizer)
|
120
|
+
{
|
121
|
+
langscan_javascript_lex_extra_t *extra = langscan_javascript_lex_get_extra(tokenizer->scanner);
|
122
|
+
free((void *)extra);
|
123
|
+
langscan_javascript_lex_lex_destroy(tokenizer->scanner);
|
124
|
+
free((void *)tokenizer);
|
125
|
+
}
|
126
|
+
|
127
|
+
user_read_t langscan_javascript_tokenizer_get_user_read(langscan_javascript_tokenizer_t *tokenizer)
|
128
|
+
{
|
129
|
+
return tokenizer->extra->user_read;
|
130
|
+
}
|
131
|
+
|
132
|
+
void *langscan_javascript_tokenizer_get_user_data(langscan_javascript_tokenizer_t *tokenizer)
|
133
|
+
{
|
134
|
+
return tokenizer->extra->user_data;
|
135
|
+
}
|
136
|
+
|
137
|
+
const char *langscan_javascript_token_name(langscan_javascript_token_t token)
|
138
|
+
{
|
139
|
+
static char *token_names[] = {
|
140
|
+
"*eof*",
|
141
|
+
#define LANGSCAN_JAVASCRIPT_TOKEN(name) #name,
|
142
|
+
LANGSCAN_JAVASCRIPT_TOKEN_LIST
|
143
|
+
#undef LANGSCAN_JAVASCRIPT_TOKEN
|
144
|
+
};
|
145
|
+
|
146
|
+
return token_names[token];
|
147
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
JavaScript
|
@@ -0,0 +1,188 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = .
|
7
|
+
topdir = C:/usr/ruby/lib/ruby/1.8/i386-mswin32
|
8
|
+
hdrdir = $(topdir)
|
9
|
+
VPATH = $(srcdir);$(topdir);$(hdrdir)
|
10
|
+
|
11
|
+
DESTDIR = C:
|
12
|
+
exec_prefix = $(prefix)
|
13
|
+
prefix = $(DESTDIR)/usr/ruby
|
14
|
+
sharedstatedir = $(DESTDIR)/etc
|
15
|
+
mandir = $(prefix)/man
|
16
|
+
oldincludedir = $(DESTDIR)/usr/include
|
17
|
+
bindir = $(exec_prefix)/bin
|
18
|
+
libexecdir = $(exec_prefix)/libexec
|
19
|
+
sitedir = $(prefix)/lib/ruby/site_ruby
|
20
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
21
|
+
includedir = $(prefix)/include
|
22
|
+
infodir = $(prefix)/info
|
23
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
24
|
+
sysconfdir = $(prefix)/etc
|
25
|
+
libdir = $(exec_prefix)/lib
|
26
|
+
sbindir = $(exec_prefix)/sbin
|
27
|
+
rubylibdir = $(libdir)/ruby/$(ruby_version)
|
28
|
+
vendordir = $(prefix)/lib/ruby/vendor_ruby
|
29
|
+
archdir = $(rubylibdir)/$(arch)
|
30
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
31
|
+
datadir = $(prefix)/share
|
32
|
+
localstatedir = $(DESTDIR)/var
|
33
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
34
|
+
|
35
|
+
CC = cl -nologo
|
36
|
+
LIBRUBY = $(RUBY_SO_NAME).lib
|
37
|
+
LIBRUBY_A = $(RUBY_SO_NAME)-static.lib
|
38
|
+
LIBRUBYARG_SHARED = $(LIBRUBY)
|
39
|
+
LIBRUBYARG_STATIC = $(LIBRUBY_A)
|
40
|
+
|
41
|
+
RUBY_EXTCONF_H =
|
42
|
+
CFLAGS = -MT -Zi -O2b2xg- -G6
|
43
|
+
INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
|
44
|
+
DEFS =
|
45
|
+
CPPFLAGS =
|
46
|
+
CXXFLAGS = $(CFLAGS)
|
47
|
+
ldflags =
|
48
|
+
dldflags = -link -incremental:no -debug -opt:ref -opt:icf -dll $(LIBPATH)
|
49
|
+
archflag =
|
50
|
+
DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
|
51
|
+
LDSHARED = cl -nologo -LD
|
52
|
+
AR = lib -nologo
|
53
|
+
EXEEXT = .exe
|
54
|
+
|
55
|
+
RUBY_INSTALL_NAME = ruby
|
56
|
+
RUBY_SO_NAME = msvcrt-ruby18
|
57
|
+
arch = i386-mswin32
|
58
|
+
sitearch = i386-msvcrt
|
59
|
+
ruby_version = 1.8
|
60
|
+
ruby = C:/usr/ruby/bin/ruby
|
61
|
+
RUBY = $(ruby:/=\)
|
62
|
+
RM = $(RUBY) -run -e rm -- -f
|
63
|
+
MAKEDIRS = @$(RUBY) -run -e mkdir -- -p
|
64
|
+
INSTALL = @$(RUBY) -run -e install -- -vp
|
65
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
66
|
+
INSTALL_DATA = $(INSTALL) -m 0644
|
67
|
+
COPY = copy > nul
|
68
|
+
|
69
|
+
#### End of system configuration section. ####
|
70
|
+
|
71
|
+
preload =
|
72
|
+
|
73
|
+
libpath = . $(libdir)
|
74
|
+
LIBPATH = -libpath:"." -libpath:"$(libdir)"
|
75
|
+
DEFFILE = $(TARGET)-$(arch).def
|
76
|
+
|
77
|
+
CLEANFILES = mkmf.log
|
78
|
+
DISTCLEANFILES = vc*.pdb $(DEFFILE)
|
79
|
+
|
80
|
+
extout =
|
81
|
+
extout_prefix =
|
82
|
+
target_prefix = /langscan/pairmatcher
|
83
|
+
LOCAL_LIBS =
|
84
|
+
LIBS = $(LIBRUBYARG_SHARED) oldnames.lib user32.lib advapi32.lib shell32.lib ws2_32.lib
|
85
|
+
SRCS = pairmatcher.c
|
86
|
+
OBJS = pairmatcher.obj
|
87
|
+
TARGET = pairmatcher
|
88
|
+
DLLIB = $(TARGET).so
|
89
|
+
EXTSTATIC =
|
90
|
+
STATIC_LIB =
|
91
|
+
|
92
|
+
BINDIR = $(bindir)
|
93
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
94
|
+
RUBYLIBDIR = C:/devel/gem/git/langscan/pkg/langscan-1.2.gem.build/lib$(target_prefix)
|
95
|
+
RUBYARCHDIR = C:/devel/gem/git/langscan/pkg/langscan-1.2.gem.build/lib$(target_prefix)
|
96
|
+
|
97
|
+
TARGET_SO = $(DLLIB)
|
98
|
+
CLEANLIBS = $(TARGET).so $(TARGET).il? $(TARGET).tds $(TARGET).map
|
99
|
+
CLEANOBJS = *.obj *.lib *.s[ol] *.pdb *.exp *.bak
|
100
|
+
|
101
|
+
all: $(DLLIB)
|
102
|
+
static: $(STATIC_LIB)
|
103
|
+
|
104
|
+
clean:
|
105
|
+
@-$(RM) $(CLEANLIBS:/=\) $(CLEANOBJS:/=\) $(CLEANFILES:/=\)
|
106
|
+
|
107
|
+
distclean: clean
|
108
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
109
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES:/=\)
|
110
|
+
|
111
|
+
realclean: distclean
|
112
|
+
install: install-so install-rb
|
113
|
+
|
114
|
+
install-so: $(RUBYARCHDIR)
|
115
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
116
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
117
|
+
$(INSTALL_PROG) $(DLLIB:/=\) $(RUBYARCHDIR:/=\)
|
118
|
+
install-rb: pre-install-rb install-rb-default
|
119
|
+
install-rb-default: pre-install-rb-default
|
120
|
+
pre-install-rb: Makefile
|
121
|
+
pre-install-rb-default: Makefile
|
122
|
+
$(RUBYARCHDIR):
|
123
|
+
$(MAKEDIRS) $@
|
124
|
+
|
125
|
+
site-install: site-install-so site-install-rb
|
126
|
+
site-install-so: install-so
|
127
|
+
site-install-rb: install-rb
|
128
|
+
|
129
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .obj
|
130
|
+
|
131
|
+
{$(hdrdir)}.cc{}.obj:
|
132
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
133
|
+
|
134
|
+
{$(topdir)}.cc{}.obj:
|
135
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
136
|
+
|
137
|
+
{$(srcdir)}.cc{}.obj:
|
138
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
139
|
+
|
140
|
+
.cc.obj:
|
141
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
142
|
+
|
143
|
+
{$(hdrdir)}.cxx{}.obj:
|
144
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
145
|
+
|
146
|
+
{$(topdir)}.cxx{}.obj:
|
147
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
148
|
+
|
149
|
+
{$(srcdir)}.cxx{}.obj:
|
150
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
151
|
+
|
152
|
+
.cxx.obj:
|
153
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
154
|
+
|
155
|
+
{$(hdrdir)}.cpp{}.obj:
|
156
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
157
|
+
|
158
|
+
{$(topdir)}.cpp{}.obj:
|
159
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
160
|
+
|
161
|
+
{$(srcdir)}.cpp{}.obj:
|
162
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
163
|
+
|
164
|
+
.cpp.obj:
|
165
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
166
|
+
|
167
|
+
{$(hdrdir)}.c{}.obj:
|
168
|
+
$(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
|
169
|
+
|
170
|
+
{$(topdir)}.c{}.obj:
|
171
|
+
$(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
|
172
|
+
|
173
|
+
{$(srcdir)}.c{}.obj:
|
174
|
+
$(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
|
175
|
+
|
176
|
+
.c.obj:
|
177
|
+
$(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
|
178
|
+
|
179
|
+
$(DLLIB): $(DEFFILE) $(OBJS) Makefile
|
180
|
+
@-$(RM) $@
|
181
|
+
$(LDSHARED) -Fe$(@) $(OBJS) $(LIBS) $(LOCAL_LIBS) $(DLDFLAGS) -implib:$(*F:.so=)-$(arch).lib -pdb:$(*F:.so=)-$(arch).pdb -def:$(DEFFILE)
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
$(DEFFILE):
|
186
|
+
$(RUBY) -e "puts 'EXPORTS', 'Init_$(TARGET)'" > $@
|
187
|
+
|
188
|
+
$(OBJS): {.;$(VPATH)}ruby.h {.;$(VPATH)}defines.h
|
@@ -0,0 +1,890 @@
|
|
1
|
+
/*
|
2
|
+
* pairmatcher.c - a pair matching parser
|
3
|
+
*
|
4
|
+
* Copyright (C) 2005 Akira Tanaka <akr@m17n.org>
|
5
|
+
* All rights reserved.
|
6
|
+
* This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
*
|
8
|
+
* You can redistribute it and/or modify it under the terms of
|
9
|
+
* the GNU General Public License version 2.
|
10
|
+
*/
|
11
|
+
|
12
|
+
#include <ruby.h>
|
13
|
+
|
14
|
+
static ID id_get_token, id_new, id_call;
|
15
|
+
static VALUE Fragment;
|
16
|
+
|
17
|
+
#ifndef RSTRUCT_PTR
|
18
|
+
# define RSTRUCT_PTR(st) (RSTRUCT(st)->ptr)
|
19
|
+
#endif
|
20
|
+
#ifndef RSTRUCT_LEN
|
21
|
+
# define RSTRUCT_LEN(st) (RSTRUCT(st)->len)
|
22
|
+
#endif
|
23
|
+
|
24
|
+
#ifndef RARRAY_PTR
|
25
|
+
# define RARRAY_PTR(str) (RARRAY(str)->ptr)
|
26
|
+
#endif
|
27
|
+
#ifndef RARRAY_LEN
|
28
|
+
# define RARRAY_LEN(str) (RARRAY(str)->len)
|
29
|
+
#endif
|
30
|
+
|
31
|
+
#ifdef SYMBOL_P
|
32
|
+
# define Check_Symbol(val) do { if (!SYMBOL_P(val)) { Check_Type(val, T_SYMBOL); } } while (0)
|
33
|
+
#else
|
34
|
+
# define Check_Symbol(val) Check_Type(val, T_SYMBOL)
|
35
|
+
#endif
|
36
|
+
|
37
|
+
static VALUE
|
38
|
+
fragment_type(VALUE fragment)
|
39
|
+
{
|
40
|
+
VALUE val;
|
41
|
+
Check_Type(fragment, T_STRUCT);
|
42
|
+
if (RBASIC(fragment)->klass != Fragment) {
|
43
|
+
rb_raise(rb_eTypeError, "not fragment");
|
44
|
+
}
|
45
|
+
val = RSTRUCT_PTR(fragment)[0];
|
46
|
+
Check_Symbol(val);
|
47
|
+
return val;
|
48
|
+
}
|
49
|
+
|
50
|
+
static VALUE
|
51
|
+
fragment_text(VALUE fragment)
|
52
|
+
{
|
53
|
+
VALUE val;
|
54
|
+
Check_Type(fragment, T_STRUCT);
|
55
|
+
if (RBASIC(fragment)->klass != Fragment) {
|
56
|
+
rb_raise(rb_eTypeError, "not fragment");
|
57
|
+
}
|
58
|
+
val = RSTRUCT_PTR(fragment)[1];
|
59
|
+
StringValue(val);
|
60
|
+
return val;
|
61
|
+
}
|
62
|
+
|
63
|
+
static int
|
64
|
+
fragment_byteno(VALUE fragment)
|
65
|
+
{
|
66
|
+
VALUE val;
|
67
|
+
Check_Type(fragment, T_STRUCT);
|
68
|
+
if (RBASIC(fragment)->klass != Fragment) {
|
69
|
+
rb_raise(rb_eTypeError, "not fragment");
|
70
|
+
}
|
71
|
+
val = RSTRUCT_PTR(fragment)[3];
|
72
|
+
return NUM2INT(val);
|
73
|
+
}
|
74
|
+
|
75
|
+
typedef struct {
|
76
|
+
unsigned char before_open_max;
|
77
|
+
unsigned char after_open_max;
|
78
|
+
unsigned char before_close_max;
|
79
|
+
unsigned char after_close_max;
|
80
|
+
VALUE pair_defs;
|
81
|
+
VALUE intertoken_defs;
|
82
|
+
VALUE recent_tokens;
|
83
|
+
VALUE pair_stack;
|
84
|
+
VALUE closed_pairs;
|
85
|
+
} pairmatcher_t;
|
86
|
+
|
87
|
+
static void pairmatcher_mark(pairmatcher_t *pairmatcher)
|
88
|
+
{
|
89
|
+
if (pairmatcher == NULL)
|
90
|
+
return;
|
91
|
+
rb_gc_mark(pairmatcher->pair_defs);
|
92
|
+
rb_gc_mark(pairmatcher->intertoken_defs);
|
93
|
+
rb_gc_mark(pairmatcher->recent_tokens);
|
94
|
+
rb_gc_mark(pairmatcher->pair_stack);
|
95
|
+
rb_gc_mark(pairmatcher->closed_pairs);
|
96
|
+
}
|
97
|
+
|
98
|
+
static void pairmatcher_free(pairmatcher_t *pairmatcher)
|
99
|
+
{
|
100
|
+
if (pairmatcher == NULL)
|
101
|
+
return;
|
102
|
+
free((void *)pairmatcher);
|
103
|
+
}
|
104
|
+
|
105
|
+
static VALUE pairmatcher_s_allocate(VALUE klass)
|
106
|
+
{
|
107
|
+
return Data_Wrap_Struct(klass, pairmatcher_mark, pairmatcher_free, NULL);
|
108
|
+
}
|
109
|
+
|
110
|
+
static VALUE pairmatcher_initialize(
|
111
|
+
VALUE self,
|
112
|
+
VALUE before_open_max,
|
113
|
+
VALUE after_open_max,
|
114
|
+
VALUE before_close_max,
|
115
|
+
VALUE after_close_max)
|
116
|
+
{
|
117
|
+
pairmatcher_t *pairmatcher;
|
118
|
+
|
119
|
+
Data_Get_Struct(self, pairmatcher_t, pairmatcher);
|
120
|
+
if (pairmatcher != NULL) { rb_raise(rb_eArgError, "called twice"); }
|
121
|
+
|
122
|
+
pairmatcher = ALLOC(pairmatcher_t);
|
123
|
+
pairmatcher->pair_defs = Qnil;
|
124
|
+
pairmatcher->intertoken_defs = Qnil;
|
125
|
+
pairmatcher->recent_tokens = Qnil;
|
126
|
+
pairmatcher->pair_stack = Qnil;
|
127
|
+
pairmatcher->closed_pairs = Qnil;
|
128
|
+
DATA_PTR(self) = pairmatcher;
|
129
|
+
|
130
|
+
pairmatcher->before_open_max = NUM2INT(before_open_max);
|
131
|
+
pairmatcher->after_open_max = NUM2INT(after_open_max);
|
132
|
+
pairmatcher->before_close_max = NUM2INT(before_close_max);
|
133
|
+
pairmatcher->after_close_max = NUM2INT(after_close_max);
|
134
|
+
pairmatcher->pair_defs = rb_ary_new();
|
135
|
+
//RBASIC(pairmatcher->pair_defs)->klass = 0;
|
136
|
+
pairmatcher->intertoken_defs = rb_ary_new();
|
137
|
+
//RBASIC(pairmatcher->intertoken_defs)->klass = 0;
|
138
|
+
pairmatcher->recent_tokens = rb_ary_new();
|
139
|
+
//RBASIC(pairmatcher->recent_tokens)->klass = 0;
|
140
|
+
pairmatcher->pair_stack = rb_ary_new();
|
141
|
+
//RBASIC(pairmatcher->pair_stack)->klass = 0;
|
142
|
+
pairmatcher->closed_pairs = rb_ary_new();
|
143
|
+
//RBASIC(pairmatcher->closed_pairs)->klass = 0;
|
144
|
+
return self;
|
145
|
+
}
|
146
|
+
|
147
|
+
#define GetPM(obj, var) \
|
148
|
+
do { \
|
149
|
+
Data_Get_Struct((obj), pairmatcher_t, (var)); \
|
150
|
+
if ((var) == NULL) { rb_raise(rb_eArgError, "not initialized"); } \
|
151
|
+
} while(0)
|
152
|
+
|
153
|
+
static VALUE
|
154
|
+
pairmatcher_get_before_open_max(VALUE self)
|
155
|
+
{
|
156
|
+
pairmatcher_t *pairmatcher;
|
157
|
+
GetPM(self, pairmatcher);
|
158
|
+
if (pairmatcher == NULL) { rb_raise(rb_eArgError, "not initialized"); }
|
159
|
+
return INT2NUM(pairmatcher->before_open_max);
|
160
|
+
}
|
161
|
+
|
162
|
+
static VALUE
|
163
|
+
pairmatcher_get_after_open_max(VALUE self)
|
164
|
+
{
|
165
|
+
pairmatcher_t *pairmatcher;
|
166
|
+
GetPM(self, pairmatcher);
|
167
|
+
return INT2NUM(pairmatcher->after_open_max);
|
168
|
+
}
|
169
|
+
|
170
|
+
static VALUE
|
171
|
+
pairmatcher_get_before_close_max(VALUE self)
|
172
|
+
{
|
173
|
+
pairmatcher_t *pairmatcher;
|
174
|
+
GetPM(self, pairmatcher);
|
175
|
+
return INT2NUM(pairmatcher->before_close_max);
|
176
|
+
}
|
177
|
+
|
178
|
+
static VALUE
|
179
|
+
pairmatcher_get_after_close_max(VALUE self)
|
180
|
+
{
|
181
|
+
pairmatcher_t *pairmatcher;
|
182
|
+
GetPM(self, pairmatcher);
|
183
|
+
return INT2NUM(pairmatcher->after_close_max);
|
184
|
+
}
|
185
|
+
|
186
|
+
static VALUE
|
187
|
+
pairmatcher_define_pair(VALUE self, VALUE pair_type, VALUE open_type, VALUE open_text, VALUE close_type, VALUE close_text)
|
188
|
+
{
|
189
|
+
pairmatcher_t *pairmatcher;
|
190
|
+
VALUE def;
|
191
|
+
Check_Symbol(open_type);
|
192
|
+
if (open_text != Qnil) {
|
193
|
+
StringValue(open_text);
|
194
|
+
open_text = rb_str_new4(open_text);
|
195
|
+
}
|
196
|
+
Check_Symbol(close_type);
|
197
|
+
if (close_text != Qnil) {
|
198
|
+
StringValue(close_text);
|
199
|
+
close_text = rb_str_new4(close_text);
|
200
|
+
}
|
201
|
+
|
202
|
+
def = rb_ary_new3(5, open_type, open_text, close_type, close_text, pair_type);
|
203
|
+
//RBASIC(def)->klass = 0;
|
204
|
+
|
205
|
+
GetPM(self, pairmatcher);
|
206
|
+
rb_ary_push(pairmatcher->pair_defs, def);
|
207
|
+
|
208
|
+
return Qnil;
|
209
|
+
}
|
210
|
+
|
211
|
+
static VALUE
|
212
|
+
pairmatcher_define_intertoken_fragment(VALUE self, VALUE type, VALUE text)
|
213
|
+
{
|
214
|
+
pairmatcher_t *pairmatcher;
|
215
|
+
VALUE def;
|
216
|
+
Check_Symbol(type);
|
217
|
+
if (text != Qnil) {
|
218
|
+
StringValue(text);
|
219
|
+
text = rb_str_new4(text);
|
220
|
+
}
|
221
|
+
|
222
|
+
def = rb_ary_new3(2, type, text);
|
223
|
+
//RBASIC(def)->klass = 0;
|
224
|
+
|
225
|
+
GetPM(self, pairmatcher);
|
226
|
+
rb_ary_push(pairmatcher->intertoken_defs, def);
|
227
|
+
|
228
|
+
return Qnil;
|
229
|
+
}
|
230
|
+
|
231
|
+
static VALUE
|
232
|
+
get_token(VALUE tokenizer)
|
233
|
+
{
|
234
|
+
return rb_funcall(tokenizer, id_get_token, 0);
|
235
|
+
}
|
236
|
+
|
237
|
+
static VALUE
|
238
|
+
open_token_p(pairmatcher_t *pairmatcher, VALUE token)
|
239
|
+
{
|
240
|
+
int i;
|
241
|
+
VALUE type = fragment_type(token);
|
242
|
+
VALUE text = fragment_text(token);
|
243
|
+
for (i = 0; i < RARRAY_LEN(pairmatcher->pair_defs); i++) {
|
244
|
+
VALUE def = RARRAY_PTR(pairmatcher->pair_defs)[i];
|
245
|
+
VALUE open_type = RARRAY_PTR(def)[0];
|
246
|
+
VALUE open_text = RARRAY_PTR(def)[1];
|
247
|
+
if (open_type == Qnil || open_type == type) {
|
248
|
+
if (open_text == Qnil || rb_str_cmp(open_text, text) == 0) {
|
249
|
+
return def;
|
250
|
+
}
|
251
|
+
}
|
252
|
+
}
|
253
|
+
return Qfalse;
|
254
|
+
}
|
255
|
+
|
256
|
+
static VALUE
|
257
|
+
close_token_p(pairmatcher_t *pairmatcher, VALUE token, int *i)
|
258
|
+
{
|
259
|
+
VALUE type = fragment_type(token);
|
260
|
+
VALUE text = fragment_text(token);
|
261
|
+
for (; *i < RARRAY_LEN(pairmatcher->pair_defs); (*i)++) {
|
262
|
+
VALUE def = RARRAY_PTR(pairmatcher->pair_defs)[*i];
|
263
|
+
VALUE close_type = RARRAY_PTR(def)[2];
|
264
|
+
VALUE close_text = RARRAY_PTR(def)[3];
|
265
|
+
if (close_type == Qnil || close_type == type) {
|
266
|
+
if (close_text == Qnil || rb_str_cmp(close_text, text) == 0) {
|
267
|
+
return def;
|
268
|
+
}
|
269
|
+
}
|
270
|
+
}
|
271
|
+
return Qfalse;
|
272
|
+
}
|
273
|
+
|
274
|
+
#define pair_get_pair_def(pair) (RSTRUCT_PTR(pair)[0])
|
275
|
+
#define pair_get_before_open_len(pair) FIX2INT(RSTRUCT_PTR(pair)[1])
|
276
|
+
#define pair_get_around_open_tokens(pair) (RSTRUCT_PTR(pair)[2])
|
277
|
+
#define pair_get_before_close_len(pair) FIX2INT(RSTRUCT_PTR(pair)[3])
|
278
|
+
#define pair_get_around_close_tokens(pair) (RSTRUCT_PTR(pair)[4])
|
279
|
+
#define pair_get_outer(pair) (RSTRUCT_PTR(pair)[5])
|
280
|
+
#define pair_set_pair_def(pair, val) (RSTRUCT_PTR(pair)[0] = (val))
|
281
|
+
#define pair_set_before_open_len(pair, len) (RSTRUCT_PTR(pair)[1] = INT2FIX(len))
|
282
|
+
#define pair_set_around_open_tokens(pair, val) (RSTRUCT_PTR(pair)[2] = (val))
|
283
|
+
#define pair_set_before_close_len(pair, len) (RSTRUCT_PTR(pair)[3] = INT2FIX(len))
|
284
|
+
#define pair_set_around_close_tokens(pair, val) (RSTRUCT_PTR(pair)[4] = (val))
|
285
|
+
|
286
|
+
#define pair_get_after_open_len(pair) (RARRAY_LEN(pair_get_around_open_tokens(pair))-pair_get_before_open_len(pair)-1)
|
287
|
+
#define pair_get_after_close_len(pair) (RARRAY_LEN(pair_get_around_close_tokens(pair))-pair_get_before_close_len(pair)-1)
|
288
|
+
|
289
|
+
static VALUE Pair;
|
290
|
+
|
291
|
+
static VALUE
|
292
|
+
make_pair(VALUE pair_def, int before_open_len, VALUE around_open_tokens, VALUE outer)
|
293
|
+
{
|
294
|
+
VALUE pair = rb_struct_new(Pair,
|
295
|
+
pair_def,
|
296
|
+
INT2FIX(before_open_len),
|
297
|
+
around_open_tokens,
|
298
|
+
Qnil,
|
299
|
+
Qnil,
|
300
|
+
outer);
|
301
|
+
return pair;
|
302
|
+
}
|
303
|
+
|
304
|
+
static int
|
305
|
+
concat_recent_tokens(pairmatcher_t *pm, int max, VALUE ary)
|
306
|
+
{
|
307
|
+
int i;
|
308
|
+
if (RARRAY_LEN(pm->recent_tokens) <= max)
|
309
|
+
max = RARRAY_LEN(pm->recent_tokens);
|
310
|
+
for (i = 0; i < max; i++) {
|
311
|
+
rb_ary_push(ary, RARRAY_PTR(pm->recent_tokens)[RARRAY_LEN(pm->recent_tokens)-max+i]);
|
312
|
+
}
|
313
|
+
return max;
|
314
|
+
}
|
315
|
+
|
316
|
+
static void
|
317
|
+
put_open_token(pairmatcher_t *pm, VALUE open_token, VALUE pair_def)
|
318
|
+
{
|
319
|
+
int before_open_len, stack_len;
|
320
|
+
VALUE pair;
|
321
|
+
VALUE around_open_tokens;
|
322
|
+
around_open_tokens = rb_ary_new2(pm->before_open_max+1+pm->after_open_max);
|
323
|
+
before_open_len = concat_recent_tokens(pm, pm->before_open_max, around_open_tokens);
|
324
|
+
rb_ary_push(around_open_tokens, open_token);
|
325
|
+
stack_len = RARRAY_LEN(pm->pair_stack);
|
326
|
+
pair = make_pair(pair_def, before_open_len, around_open_tokens,
|
327
|
+
stack_len ? RARRAY_PTR(pm->pair_stack)[stack_len-1] : Qnil);
|
328
|
+
rb_ary_push(pm->pair_stack, pair);
|
329
|
+
}
|
330
|
+
|
331
|
+
static int
|
332
|
+
matching_open_depth(pairmatcher_t *pm, VALUE open_token, VALUE pair_def)
|
333
|
+
{
|
334
|
+
int i;
|
335
|
+
for (i = RARRAY_LEN(pm->pair_stack) - 1; 0 <= i; i--) {
|
336
|
+
if (pair_get_pair_def(RARRAY_PTR(pm->pair_stack)[i]) == pair_def) {
|
337
|
+
return i;
|
338
|
+
}
|
339
|
+
}
|
340
|
+
return -1;
|
341
|
+
}
|
342
|
+
|
343
|
+
static void
|
344
|
+
report_token_list_now(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len)
|
345
|
+
{
|
346
|
+
while (len) {
|
347
|
+
rb_funcall(reporter, id_call, 1, RARRAY_PTR(token_list)[beg]);
|
348
|
+
beg++;
|
349
|
+
len--;
|
350
|
+
}
|
351
|
+
}
|
352
|
+
|
353
|
+
static void
|
354
|
+
report_token_list_open_pair(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len)
|
355
|
+
{
|
356
|
+
int i;
|
357
|
+
for (i = RARRAY_LEN(pm->pair_stack) - 1; 0 <= i; i--) {
|
358
|
+
VALUE pair = RARRAY_PTR(pm->pair_stack)[i];
|
359
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
360
|
+
int first_byteno = fragment_byteno(RARRAY_PTR(around_open)[0]);
|
361
|
+
int last_byteno = fragment_byteno(RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1]);
|
362
|
+
if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg])) {
|
363
|
+
report_token_list_now(pm, reporter, token_list, beg, len);
|
364
|
+
return;
|
365
|
+
}
|
366
|
+
/* last_byteno >= fragment_byteno(RARRAY_PTR(token_list)[beg]) */
|
367
|
+
if (fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) < first_byteno)
|
368
|
+
continue;
|
369
|
+
/*
|
370
|
+
* fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno
|
371
|
+
* first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])
|
372
|
+
*/
|
373
|
+
if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])) {
|
374
|
+
int beg2 = beg+len-1;
|
375
|
+
int len2 = 1;
|
376
|
+
while (beg <= beg2 && last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg2-1])) {
|
377
|
+
beg2--;
|
378
|
+
len2++;
|
379
|
+
}
|
380
|
+
report_token_list_now(pm, reporter, token_list, beg2, len2);
|
381
|
+
len -= len2;
|
382
|
+
}
|
383
|
+
/*
|
384
|
+
* first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) <= last_byteno
|
385
|
+
* fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno
|
386
|
+
*/
|
387
|
+
while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])) {
|
388
|
+
len--;
|
389
|
+
}
|
390
|
+
if (len == 0)
|
391
|
+
return;
|
392
|
+
}
|
393
|
+
report_token_list_now(pm, reporter, token_list, beg, len);
|
394
|
+
}
|
395
|
+
|
396
|
+
static void
|
397
|
+
report_token_list_rec_closed_pair(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len, int closed_pairs_index)
|
398
|
+
{
|
399
|
+
VALUE pair;
|
400
|
+
VALUE around_close, around_open, first, last;
|
401
|
+
int first_byteno, last_byteno;
|
402
|
+
if (RARRAY_LEN(pm->closed_pairs) <= closed_pairs_index) {
|
403
|
+
report_token_list_open_pair(pm, reporter, token_list, beg, len);
|
404
|
+
return;
|
405
|
+
}
|
406
|
+
pair = RARRAY_PTR(pm->closed_pairs)[closed_pairs_index];
|
407
|
+
if (pair == Qnil) {
|
408
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
|
409
|
+
return;
|
410
|
+
}
|
411
|
+
around_close = pair_get_around_close_tokens(pair);
|
412
|
+
first = RARRAY_PTR(around_close)[0];
|
413
|
+
first_byteno = fragment_byteno(first);
|
414
|
+
while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
|
415
|
+
len--;
|
416
|
+
if (len == 0)
|
417
|
+
return;
|
418
|
+
around_open = pair_get_around_open_tokens(pair);
|
419
|
+
first = RARRAY_PTR(around_open)[0];
|
420
|
+
first_byteno = fragment_byteno(first);
|
421
|
+
last = RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1];
|
422
|
+
last_byteno = fragment_byteno(last);
|
423
|
+
if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg]) ||
|
424
|
+
fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) < first_byteno) {
|
425
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
|
426
|
+
}
|
427
|
+
else if (first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg])) {
|
428
|
+
while (0 < len && fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno) {
|
429
|
+
beg++;
|
430
|
+
len--;
|
431
|
+
}
|
432
|
+
if (len)
|
433
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
|
434
|
+
}
|
435
|
+
else if (fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) <= last_byteno) {
|
436
|
+
while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
|
437
|
+
len--;
|
438
|
+
if (len)
|
439
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
|
440
|
+
}
|
441
|
+
else {
|
442
|
+
int beg1, len1;
|
443
|
+
int beg2, len2;
|
444
|
+
beg1 = beg;
|
445
|
+
len1 = 1;
|
446
|
+
while (len1 < len && fragment_byteno(RARRAY_PTR(token_list)[beg1+len1]) < first_byteno)
|
447
|
+
len1++;
|
448
|
+
beg2 = beg + len - 1;
|
449
|
+
len2 = 1;
|
450
|
+
while (beg <= beg2-1 && last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg2-1])) {
|
451
|
+
beg2--;
|
452
|
+
len2++;
|
453
|
+
}
|
454
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg1, len1, closed_pairs_index+1);
|
455
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg2, len2, closed_pairs_index+1);
|
456
|
+
}
|
457
|
+
}
|
458
|
+
|
459
|
+
static void
|
460
|
+
report_token_list(pairmatcher_t *pm, VALUE token_list, int beg, int len, VALUE reporter)
|
461
|
+
{
|
462
|
+
if (len < 0)
|
463
|
+
len = RARRAY_LEN(token_list) - beg;
|
464
|
+
if (len == 0)
|
465
|
+
return;
|
466
|
+
if (RARRAY_LEN(pm->recent_tokens) != 0) {
|
467
|
+
int first_byteno = fragment_byteno(RARRAY_PTR(pm->recent_tokens)[0]);
|
468
|
+
if (first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[0]))
|
469
|
+
return;
|
470
|
+
while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
|
471
|
+
len--;
|
472
|
+
}
|
473
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, 0);
|
474
|
+
}
|
475
|
+
|
476
|
+
static void
|
477
|
+
discard_unmatched_pair(pairmatcher_t *pm, VALUE reporter)
|
478
|
+
{
|
479
|
+
VALUE pair = rb_ary_pop(pm->pair_stack);
|
480
|
+
report_token_list(pm, pair_get_around_open_tokens(pair), 0, -1, reporter);
|
481
|
+
}
|
482
|
+
|
483
|
+
static void
|
484
|
+
put_close_token(pairmatcher_t *pm, VALUE close_token, VALUE pair_def, int depth, VALUE reporter)
|
485
|
+
{
|
486
|
+
VALUE pair, around_close_tokens;
|
487
|
+
int before_close_len;
|
488
|
+
while (depth+1 < RARRAY_LEN(pm->pair_stack)) {
|
489
|
+
discard_unmatched_pair(pm, reporter);
|
490
|
+
}
|
491
|
+
pair = rb_ary_pop(pm->pair_stack);
|
492
|
+
around_close_tokens = rb_ary_new2(pm->before_close_max+1+pm->after_close_max);
|
493
|
+
before_close_len = concat_recent_tokens(pm, pm->before_close_max, around_close_tokens);
|
494
|
+
rb_ary_push(around_close_tokens, close_token);
|
495
|
+
pair_set_before_close_len(pair, before_close_len);
|
496
|
+
pair_set_around_close_tokens(pair, around_close_tokens);
|
497
|
+
rb_ary_push(pm->closed_pairs, pair);
|
498
|
+
}
|
499
|
+
|
500
|
+
static void
|
501
|
+
add_recent(pairmatcher_t *pm, VALUE reporter, VALUE token)
|
502
|
+
{
|
503
|
+
int max = pm->before_open_max;
|
504
|
+
if (max < pm->before_close_max)
|
505
|
+
max = pm->before_close_max;
|
506
|
+
if (max == 0)
|
507
|
+
return;
|
508
|
+
if (RARRAY_LEN(pm->recent_tokens) < max) {
|
509
|
+
rb_ary_push(pm->recent_tokens, token);
|
510
|
+
}
|
511
|
+
else {
|
512
|
+
VALUE val = RARRAY_PTR(pm->recent_tokens)[0];
|
513
|
+
MEMMOVE(RARRAY_PTR(pm->recent_tokens),
|
514
|
+
RARRAY_PTR(pm->recent_tokens)+1,
|
515
|
+
VALUE, max-1);
|
516
|
+
RARRAY_PTR(pm->recent_tokens)[max-1] = token;
|
517
|
+
report_token_list(pm, rb_ary_new3(1, val), 0, -1, reporter);
|
518
|
+
}
|
519
|
+
}
|
520
|
+
|
521
|
+
static void
|
522
|
+
add_after_open(pairmatcher_t *pm, VALUE token)
|
523
|
+
{
|
524
|
+
int i;
|
525
|
+
int max = pm->after_open_max;
|
526
|
+
VALUE pair;
|
527
|
+
for (i = RARRAY_LEN(pm->pair_stack)-1; 0 <= i; i--) {
|
528
|
+
pair = RARRAY_PTR(pm->pair_stack)[i];
|
529
|
+
if (max <= pair_get_after_open_len(pair))
|
530
|
+
break;
|
531
|
+
rb_ary_push(pair_get_around_open_tokens(pair), token);
|
532
|
+
}
|
533
|
+
for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
|
534
|
+
pair = RARRAY_PTR(pm->closed_pairs)[i];
|
535
|
+
if (pair == Qnil)
|
536
|
+
continue;
|
537
|
+
if (pair_get_after_open_len(pair) < max)
|
538
|
+
rb_ary_push(pair_get_around_open_tokens(pair), token);
|
539
|
+
}
|
540
|
+
}
|
541
|
+
|
542
|
+
static void
|
543
|
+
add_after_close(pairmatcher_t *pm, VALUE token)
|
544
|
+
{
|
545
|
+
int i;
|
546
|
+
int max = pm->after_close_max;
|
547
|
+
VALUE pair;
|
548
|
+
for (i = RARRAY_LEN(pm->closed_pairs) - 1; 0 <= i; i--) {
|
549
|
+
pair = RARRAY_PTR(pm->closed_pairs)[i];
|
550
|
+
if (pair == Qnil)
|
551
|
+
continue;
|
552
|
+
if (max <= pair_get_after_close_len(pair))
|
553
|
+
break;
|
554
|
+
rb_ary_push(pair_get_around_close_tokens(pair), token);
|
555
|
+
}
|
556
|
+
}
|
557
|
+
|
558
|
+
static void
|
559
|
+
discard_matched_pair(pairmatcher_t *pm, VALUE pair, VALUE reporter)
|
560
|
+
{
|
561
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
562
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
563
|
+
if (fragment_byteno(RARRAY_PTR(around_close)[0]) <=
|
564
|
+
fragment_byteno(RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1])) {
|
565
|
+
int len1 = RARRAY_LEN(around_open);
|
566
|
+
while (0 < len1 && fragment_byteno(RARRAY_PTR(around_close)[0]) <= fragment_byteno(RARRAY_PTR(around_open)[len1-1]))
|
567
|
+
len1--;
|
568
|
+
if (len1) {
|
569
|
+
report_token_list(pm, around_open, 0, len1, reporter);
|
570
|
+
}
|
571
|
+
report_token_list(pm, around_close, 0, -1, reporter);
|
572
|
+
}
|
573
|
+
else {
|
574
|
+
report_token_list(pm, around_open, 0, -1, reporter);
|
575
|
+
report_token_list(pm, around_close, 0, -1, reporter);
|
576
|
+
}
|
577
|
+
}
|
578
|
+
|
579
|
+
static VALUE
|
580
|
+
ary_subseq(VALUE ary, int beg, int len)
|
581
|
+
{
|
582
|
+
VALUE argv[2];
|
583
|
+
argv[0] = INT2NUM(beg);
|
584
|
+
argv[1] = INT2NUM(len);
|
585
|
+
return rb_ary_aref(2, argv, ary);
|
586
|
+
}
|
587
|
+
|
588
|
+
static void
|
589
|
+
yield_pair(VALUE pair)
|
590
|
+
{
|
591
|
+
rb_yield(pair);
|
592
|
+
}
|
593
|
+
|
594
|
+
static VALUE
|
595
|
+
pair_before_open_tokens(VALUE pair)
|
596
|
+
{
|
597
|
+
int before_len;
|
598
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
599
|
+
before_len = pair_get_before_open_len(pair);
|
600
|
+
return ary_subseq(around_open, 0, before_len);
|
601
|
+
}
|
602
|
+
|
603
|
+
static VALUE
|
604
|
+
pair_around_open(VALUE pair, VALUE index)
|
605
|
+
{
|
606
|
+
int before_len;
|
607
|
+
int i = NUM2INT(index);
|
608
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
609
|
+
before_len = pair_get_before_open_len(pair);
|
610
|
+
if (-before_len <= i && i <= pair_get_after_open_len(pair))
|
611
|
+
return rb_ary_entry(around_open, before_len+i);
|
612
|
+
else
|
613
|
+
return Qnil;
|
614
|
+
}
|
615
|
+
|
616
|
+
static VALUE
|
617
|
+
pair_open_token(VALUE pair)
|
618
|
+
{
|
619
|
+
int before_len;
|
620
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
621
|
+
before_len = pair_get_before_open_len(pair);
|
622
|
+
return rb_ary_entry(around_open, before_len);
|
623
|
+
}
|
624
|
+
|
625
|
+
static VALUE
|
626
|
+
pair_after_open_tokens(VALUE pair)
|
627
|
+
{
|
628
|
+
int before_len, after_len;
|
629
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
630
|
+
before_len = pair_get_before_open_len(pair);
|
631
|
+
after_len = pair_get_after_open_len(pair);
|
632
|
+
return ary_subseq(around_open, before_len+1, after_len);
|
633
|
+
return ary_subseq(around_open, 0, before_len);
|
634
|
+
}
|
635
|
+
|
636
|
+
static VALUE
|
637
|
+
pair_before_close_tokens(VALUE pair)
|
638
|
+
{
|
639
|
+
int before_len;
|
640
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
641
|
+
if (around_close == Qnil) return Qnil;
|
642
|
+
before_len = pair_get_before_close_len(pair);
|
643
|
+
return ary_subseq(around_close, 0, before_len);
|
644
|
+
}
|
645
|
+
|
646
|
+
static VALUE
|
647
|
+
pair_around_close(VALUE pair, VALUE index)
|
648
|
+
{
|
649
|
+
int before_len;
|
650
|
+
int i = NUM2INT(index);
|
651
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
652
|
+
if (around_close == Qnil) return Qnil;
|
653
|
+
before_len = pair_get_before_close_len(pair);
|
654
|
+
if (-before_len <= i && i <= pair_get_after_close_len(pair))
|
655
|
+
return rb_ary_entry(around_close, before_len+i);
|
656
|
+
else
|
657
|
+
return Qnil;
|
658
|
+
}
|
659
|
+
|
660
|
+
static VALUE
|
661
|
+
pair_close_token(VALUE pair)
|
662
|
+
{
|
663
|
+
int before_len;
|
664
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
665
|
+
if (around_close == Qnil) return Qnil;
|
666
|
+
before_len = pair_get_before_close_len(pair);
|
667
|
+
return rb_ary_entry(around_close, before_len);
|
668
|
+
}
|
669
|
+
|
670
|
+
static VALUE
|
671
|
+
pair_after_close_tokens(VALUE pair)
|
672
|
+
{
|
673
|
+
int before_len, after_len;
|
674
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
675
|
+
if (around_close == Qnil) return Qnil;
|
676
|
+
before_len = pair_get_before_close_len(pair);
|
677
|
+
after_len = pair_get_after_close_len(pair);
|
678
|
+
return ary_subseq(around_close, before_len+1, after_len);
|
679
|
+
return ary_subseq(around_close, 0, before_len);
|
680
|
+
}
|
681
|
+
|
682
|
+
static VALUE
|
683
|
+
pair_before_open_length(VALUE pair)
|
684
|
+
{
|
685
|
+
return INT2FIX(pair_get_before_open_len(pair));
|
686
|
+
}
|
687
|
+
|
688
|
+
static VALUE
|
689
|
+
pair_after_open_length(VALUE pair)
|
690
|
+
{
|
691
|
+
return INT2FIX(pair_get_after_open_len(pair));
|
692
|
+
}
|
693
|
+
|
694
|
+
static VALUE
|
695
|
+
pair_before_close_length(VALUE pair)
|
696
|
+
{
|
697
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
698
|
+
if (around_close == Qnil) return Qnil;
|
699
|
+
return INT2FIX(pair_get_before_close_len(pair));
|
700
|
+
}
|
701
|
+
|
702
|
+
static VALUE
|
703
|
+
pair_after_close_length(VALUE pair)
|
704
|
+
{
|
705
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
706
|
+
if (around_close == Qnil) return Qnil;
|
707
|
+
return INT2FIX(pair_get_after_close_len(pair));
|
708
|
+
}
|
709
|
+
|
710
|
+
static VALUE
|
711
|
+
pair_pair_type(VALUE pair)
|
712
|
+
{
|
713
|
+
VALUE def = pair_get_pair_def(pair);
|
714
|
+
return RARRAY_PTR(def)[4];
|
715
|
+
}
|
716
|
+
|
717
|
+
static void
|
718
|
+
check_closed_pairs(pairmatcher_t *pm, VALUE reporter)
|
719
|
+
{
|
720
|
+
int i, j;
|
721
|
+
int after_open_max = pm->after_open_max;
|
722
|
+
int after_close_max = pm->after_close_max;
|
723
|
+
VALUE pair;
|
724
|
+
for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
|
725
|
+
pair = RARRAY_PTR(pm->closed_pairs)[i];
|
726
|
+
if (pair == Qnil)
|
727
|
+
continue;
|
728
|
+
if (pair_get_after_open_len(pair) == after_open_max &&
|
729
|
+
pair_get_after_close_len(pair) == after_close_max) {
|
730
|
+
RARRAY_PTR(pm->closed_pairs)[i] = Qnil;
|
731
|
+
yield_pair(pair);
|
732
|
+
discard_matched_pair(pm, pair, reporter);
|
733
|
+
}
|
734
|
+
}
|
735
|
+
j = 0;
|
736
|
+
for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
|
737
|
+
if (RARRAY_PTR(pm->closed_pairs)[i] != Qnil) {
|
738
|
+
RARRAY_PTR(pm->closed_pairs)[j] = RARRAY_PTR(pm->closed_pairs)[i];
|
739
|
+
j++;
|
740
|
+
}
|
741
|
+
}
|
742
|
+
while (j < RARRAY_LEN(pm->closed_pairs))
|
743
|
+
rb_ary_pop(pm->closed_pairs);
|
744
|
+
}
|
745
|
+
|
746
|
+
static void
|
747
|
+
put_token(pairmatcher_t *pairmatcher, VALUE token, VALUE reporter)
|
748
|
+
{
|
749
|
+
VALUE pair_def, tmp_pair_def;
|
750
|
+
int depth, max_depth, i;
|
751
|
+
add_after_open(pairmatcher, token);
|
752
|
+
add_after_close(pairmatcher, token);
|
753
|
+
check_closed_pairs(pairmatcher, reporter);
|
754
|
+
|
755
|
+
if ((pair_def = open_token_p(pairmatcher, token))) {
|
756
|
+
put_open_token(pairmatcher, token, pair_def);
|
757
|
+
}
|
758
|
+
else {
|
759
|
+
i = 0;
|
760
|
+
max_depth = 0;
|
761
|
+
pair_def = Qfalse;
|
762
|
+
while ((tmp_pair_def = close_token_p(pairmatcher, token, &i)) != Qfalse) {
|
763
|
+
depth = matching_open_depth(pairmatcher, token, tmp_pair_def);
|
764
|
+
if (max_depth <= depth) {
|
765
|
+
pair_def = tmp_pair_def;
|
766
|
+
max_depth = depth;
|
767
|
+
}
|
768
|
+
i++;
|
769
|
+
}
|
770
|
+
if (pair_def != Qfalse) {
|
771
|
+
put_close_token(pairmatcher, token, pair_def, max_depth, reporter);
|
772
|
+
}
|
773
|
+
}
|
774
|
+
add_recent(pairmatcher, reporter, token);
|
775
|
+
}
|
776
|
+
|
777
|
+
static int
|
778
|
+
intertoken_p(pairmatcher_t *pairmatcher, VALUE token_type)
|
779
|
+
{
|
780
|
+
int i;
|
781
|
+
for (i = 0; i < RARRAY_LEN(pairmatcher->intertoken_defs); i++) {
|
782
|
+
VALUE def = RARRAY_PTR(pairmatcher->intertoken_defs)[i];
|
783
|
+
VALUE def_type = RARRAY_PTR(def)[0];
|
784
|
+
//VALUE def_text = RARRAY_PTR(def)[1];
|
785
|
+
if (def_type == token_type) {
|
786
|
+
return 1;
|
787
|
+
}
|
788
|
+
}
|
789
|
+
return 0;
|
790
|
+
}
|
791
|
+
|
792
|
+
static void
|
793
|
+
finish(pairmatcher_t *pm, VALUE reporter)
|
794
|
+
{
|
795
|
+
int i;
|
796
|
+
VALUE pair;
|
797
|
+
for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
|
798
|
+
pair = RARRAY_PTR(pm->closed_pairs)[i];
|
799
|
+
if (pair == Qnil)
|
800
|
+
continue;
|
801
|
+
RARRAY_PTR(pm->closed_pairs)[i] = Qnil;
|
802
|
+
yield_pair(pair);
|
803
|
+
discard_matched_pair(pm, pair, reporter);
|
804
|
+
}
|
805
|
+
while (RARRAY_LEN(pm->pair_stack)) {
|
806
|
+
discard_unmatched_pair(pm, reporter);
|
807
|
+
}
|
808
|
+
report_token_list_now(pm, reporter, pm->recent_tokens, 0, RARRAY_LEN(pm->recent_tokens));
|
809
|
+
}
|
810
|
+
|
811
|
+
static void
|
812
|
+
parse(pairmatcher_t *pm, VALUE tokenizer, VALUE reporter)
|
813
|
+
{
|
814
|
+
VALUE token_info;
|
815
|
+
while ((token_info = get_token(tokenizer)) != Qnil) {
|
816
|
+
VALUE token_type, token_text, token_lineno, token_byteno;
|
817
|
+
VALUE token;
|
818
|
+
Check_Type(token_info, T_ARRAY);
|
819
|
+
if (RARRAY_LEN(token_info) != 8) {
|
820
|
+
rb_raise(rb_eArgError, "unexpected token");
|
821
|
+
}
|
822
|
+
token_type = RARRAY_PTR(token_info)[0];
|
823
|
+
token_text = RARRAY_PTR(token_info)[1];
|
824
|
+
token_lineno = RARRAY_PTR(token_info)[2];
|
825
|
+
token_byteno = RARRAY_PTR(token_info)[4];
|
826
|
+
token = rb_funcall(Fragment, id_new, 4, token_type, token_text, token_lineno, token_byteno);
|
827
|
+
if (intertoken_p(pm, token_type)) {
|
828
|
+
rb_funcall(reporter, id_call, 1, token);
|
829
|
+
}
|
830
|
+
else {
|
831
|
+
put_token(pm, token, reporter);
|
832
|
+
}
|
833
|
+
}
|
834
|
+
finish(pm, reporter);
|
835
|
+
}
|
836
|
+
|
837
|
+
static VALUE
|
838
|
+
pairmatcher_parse(VALUE self, VALUE tokenizer, VALUE reporter)
|
839
|
+
{
|
840
|
+
pairmatcher_t *pairmatcher;
|
841
|
+
GetPM(self, pairmatcher);
|
842
|
+
parse(pairmatcher, tokenizer, reporter);
|
843
|
+
|
844
|
+
return Qnil;
|
845
|
+
}
|
846
|
+
|
847
|
+
void Init_pairmatcher(void)
|
848
|
+
{
|
849
|
+
VALUE LangScan = rb_const_get(rb_cObject, rb_intern("LangScan"));
|
850
|
+
VALUE PairMatcher = rb_define_class_under(LangScan, "PairMatcher", rb_cData);
|
851
|
+
Fragment = rb_const_get(LangScan, rb_intern("Fragment"));
|
852
|
+
rb_global_variable(&Fragment);
|
853
|
+
|
854
|
+
id_get_token = rb_intern("get_token");
|
855
|
+
id_new = rb_intern("new");
|
856
|
+
id_call = rb_intern("call");
|
857
|
+
|
858
|
+
rb_define_alloc_func(PairMatcher, pairmatcher_s_allocate);
|
859
|
+
rb_define_method(PairMatcher, "initialize", pairmatcher_initialize, 4);
|
860
|
+
//rb_define_method(PairMatcher, "initialize_copy", pairmatcher_initialize_copy, 1);
|
861
|
+
rb_define_method(PairMatcher, "define_intertoken_fragment", pairmatcher_define_intertoken_fragment, 2);
|
862
|
+
rb_define_method(PairMatcher, "define_pair", pairmatcher_define_pair, 5);
|
863
|
+
rb_define_method(PairMatcher, "before_open_max", pairmatcher_get_before_open_max, 0);
|
864
|
+
rb_define_method(PairMatcher, "after_open_max", pairmatcher_get_after_open_max, 0);
|
865
|
+
rb_define_method(PairMatcher, "before_close_max", pairmatcher_get_before_close_max, 0);
|
866
|
+
rb_define_method(PairMatcher, "after_close_max", pairmatcher_get_after_close_max, 0);
|
867
|
+
rb_define_method(PairMatcher, "parse", pairmatcher_parse, 2);
|
868
|
+
|
869
|
+
Pair = rb_struct_define("LangScanPair",
|
870
|
+
"pair_def",
|
871
|
+
"before_open_len",
|
872
|
+
"around_open_tokens",
|
873
|
+
"before_close_len",
|
874
|
+
"around_close_tokens",
|
875
|
+
"outer",
|
876
|
+
NULL);
|
877
|
+
rb_define_method(Pair, "before_open_tokens", pair_before_open_tokens, 0);
|
878
|
+
rb_define_method(Pair, "before_open_length", pair_before_open_length, 0);
|
879
|
+
rb_define_method(Pair, "around_open", pair_around_open, 1);
|
880
|
+
rb_define_method(Pair, "open_token", pair_open_token, 0);
|
881
|
+
rb_define_method(Pair, "after_open_tokens", pair_after_open_tokens, 0);
|
882
|
+
rb_define_method(Pair, "after_open_length", pair_after_open_length, 0);
|
883
|
+
rb_define_method(Pair, "before_close_tokens", pair_before_close_tokens, 0);
|
884
|
+
rb_define_method(Pair, "before_close_length", pair_before_close_length, 0);
|
885
|
+
rb_define_method(Pair, "around_close", pair_around_close, 1);
|
886
|
+
rb_define_method(Pair, "close_token", pair_close_token, 0);
|
887
|
+
rb_define_method(Pair, "after_close_tokens", pair_after_close_tokens, 0);
|
888
|
+
rb_define_method(Pair, "after_close_length", pair_after_close_length, 0);
|
889
|
+
rb_define_method(Pair, "pair_type", pair_pair_type, 0);
|
890
|
+
}
|