langscan 1.2-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS.txt +19 -0
- data/History.txt +126 -0
- data/Manifest.txt +167 -0
- data/README.rdoc +91 -0
- data/Rakefile +40 -0
- data/ext/langscan/_make_c.rb +20 -0
- data/ext/langscan/_make_h.rb +30 -0
- data/ext/langscan/_template.c +134 -0
- data/ext/langscan/_template.h +53 -0
- data/ext/langscan/c/c/Makefile +188 -0
- data/ext/langscan/c/c/c.c +134 -0
- data/ext/langscan/c/c/c.h +66 -0
- data/ext/langscan/c/c/ctok.c +4629 -0
- data/ext/langscan/c/c/ctok.l +212 -0
- data/ext/langscan/c/c/extconf.rb +3 -0
- data/ext/langscan/c/c/modulename.txt +1 -0
- data/ext/langscan/c/c/tokenlist.txt +13 -0
- data/ext/langscan/csharp/csharp/Makefile +188 -0
- data/ext/langscan/csharp/csharp/csharp.c +134 -0
- data/ext/langscan/csharp/csharp/csharp.h +65 -0
- data/ext/langscan/csharp/csharp/csharptok.c +2971 -0
- data/ext/langscan/csharp/csharp/csharptok.l +200 -0
- data/ext/langscan/csharp/csharp/extconf.rb +3 -0
- data/ext/langscan/csharp/csharp/modulename.txt +1 -0
- data/ext/langscan/csharp/csharp/tokenlist.txt +12 -0
- data/ext/langscan/d/d/Makefile +188 -0
- data/ext/langscan/d/d/d.c +134 -0
- data/ext/langscan/d/d/d.h +64 -0
- data/ext/langscan/d/d/dtok.c +5468 -0
- data/ext/langscan/d/d/dtok.l +282 -0
- data/ext/langscan/d/d/extconf.rb +3 -0
- data/ext/langscan/d/d/modulename.txt +1 -0
- data/ext/langscan/d/d/tokenlist.txt +11 -0
- data/ext/langscan/elisp/elisp/Makefile +188 -0
- data/ext/langscan/elisp/elisp/elisp.c +134 -0
- data/ext/langscan/elisp/elisp/elisp.h +62 -0
- data/ext/langscan/elisp/elisp/elisptok.c +2108 -0
- data/ext/langscan/elisp/elisp/elisptok.l +151 -0
- data/ext/langscan/elisp/elisp/extconf.rb +3 -0
- data/ext/langscan/elisp/elisp/modulename.txt +1 -0
- data/ext/langscan/elisp/elisp/tokenlist.txt +9 -0
- data/ext/langscan/java/java/Makefile +188 -0
- data/ext/langscan/java/java/extconf.rb +3 -0
- data/ext/langscan/java/java/java.c +134 -0
- data/ext/langscan/java/java/java.h +64 -0
- data/ext/langscan/java/java/javatok.c +2097 -0
- data/ext/langscan/java/java/javatok.l +155 -0
- data/ext/langscan/java/java/modulename.txt +1 -0
- data/ext/langscan/java/java/tokenlist.txt +11 -0
- data/ext/langscan/javascript/javascript/Makefile +188 -0
- data/ext/langscan/javascript/javascript/extconf.rb +3 -0
- data/ext/langscan/javascript/javascript/javascript.c +134 -0
- data/ext/langscan/javascript/javascript/javascript.h +63 -0
- data/ext/langscan/javascript/javascript/javascripttok.c +2058 -0
- data/ext/langscan/javascript/javascript/javascripttok.l +147 -0
- data/ext/langscan/javascript/javascript/modulename.txt +1 -0
- data/ext/langscan/javascript/javascript/tokenlist.txt +10 -0
- data/ext/langscan/pairmatcher/pairmatcher/Makefile +188 -0
- data/ext/langscan/pairmatcher/pairmatcher/extconf.rb +3 -0
- data/ext/langscan/pairmatcher/pairmatcher/pairmatcher.c +890 -0
- data/ext/langscan/php/php/Makefile +188 -0
- data/ext/langscan/php/php/extconf.rb +3 -0
- data/ext/langscan/php/php/modulename.txt +1 -0
- data/ext/langscan/php/php/php.c +134 -0
- data/ext/langscan/php/php/php.h +64 -0
- data/ext/langscan/php/php/phptok.c +2413 -0
- data/ext/langscan/php/php/phptok.l +212 -0
- data/ext/langscan/php/php/tokenlist.txt +11 -0
- data/ext/langscan/post-distclean.rb +21 -0
- data/ext/langscan/pre-config.rb +57 -0
- data/ext/langscan/python/python/Makefile +188 -0
- data/ext/langscan/python/python/extconf.rb +3 -0
- data/ext/langscan/python/python/modulename.txt +1 -0
- data/ext/langscan/python/python/python.c +134 -0
- data/ext/langscan/python/python/python.h +61 -0
- data/ext/langscan/python/python/pythontok.c +2109 -0
- data/ext/langscan/python/python/pythontok.l +155 -0
- data/ext/langscan/python/python/tokenlist.txt +8 -0
- data/ext/langscan/ruby/compat/ripper/Makefile +189 -0
- data/ext/langscan/ruby/compat/ripper/depend +1 -0
- data/ext/langscan/ruby/compat/ripper/extconf.rb +4 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids1.c +251 -0
- data/ext/langscan/ruby/compat/ripper/include/eventids2.c +277 -0
- data/ext/langscan/ruby/compat/ripper/include/lex.c +138 -0
- data/ext/langscan/ruby/compat/ripper/ripper.c +14420 -0
- data/ext/langscan/scheme/scheme/Makefile +188 -0
- data/ext/langscan/scheme/scheme/extconf.rb +3 -0
- data/ext/langscan/scheme/scheme/modulename.txt +1 -0
- data/ext/langscan/scheme/scheme/scheme.c +134 -0
- data/ext/langscan/scheme/scheme/scheme.h +60 -0
- data/ext/langscan/scheme/scheme/schemetok.c +2454 -0
- data/ext/langscan/scheme/scheme/schemetok.l +177 -0
- data/ext/langscan/scheme/scheme/tokenlist.txt +7 -0
- data/ext/langscan/sh/sh/Makefile +188 -0
- data/ext/langscan/sh/sh/extconf.rb +3 -0
- data/ext/langscan/sh/sh/modulename.txt +1 -0
- data/ext/langscan/sh/sh/sh.c +134 -0
- data/ext/langscan/sh/sh/sh.h +61 -0
- data/ext/langscan/sh/sh/shtok.c +2477 -0
- data/ext/langscan/sh/sh/shtok.l +325 -0
- data/ext/langscan/sh/sh/tokenlist.txt +8 -0
- data/lib/langscan.rb +124 -0
- data/lib/langscan/_common.rb +50 -0
- data/lib/langscan/_easyscanner.rb +78 -0
- data/lib/langscan/_pairmatcher.rb +46 -0
- data/lib/langscan/_type.rb +125 -0
- data/lib/langscan/autoconf.rb +51 -0
- data/lib/langscan/automake.rb +51 -0
- data/lib/langscan/brainfuck.rb +48 -0
- data/lib/langscan/c.rb +144 -0
- data/lib/langscan/c/c.so +0 -0
- data/lib/langscan/csharp.rb +101 -0
- data/lib/langscan/csharp/csharp.so +0 -0
- data/lib/langscan/css.rb +109 -0
- data/lib/langscan/d.rb +201 -0
- data/lib/langscan/d/d.so +0 -0
- data/lib/langscan/eiffel.rb +167 -0
- data/lib/langscan/elisp.rb +132 -0
- data/lib/langscan/elisp/elisp.so +0 -0
- data/lib/langscan/io.rb +84 -0
- data/lib/langscan/java.rb +95 -0
- data/lib/langscan/java/java.so +0 -0
- data/lib/langscan/javascript.rb +97 -0
- data/lib/langscan/javascript/javascript.so +0 -0
- data/lib/langscan/lua.rb +116 -0
- data/lib/langscan/ocaml.rb +298 -0
- data/lib/langscan/ocaml/camlexer.ml +28 -0
- data/lib/langscan/ocaml/lexer.mll +230 -0
- data/lib/langscan/ocaml/types.ml +36 -0
- data/lib/langscan/pairmatcher/pairmatcher.so +0 -0
- data/lib/langscan/perl.rb +87 -0
- data/lib/langscan/perl/tokenizer.pl +231 -0
- data/lib/langscan/php.rb +80 -0
- data/lib/langscan/php/php.so +0 -0
- data/lib/langscan/python.rb +101 -0
- data/lib/langscan/python/python.so +0 -0
- data/lib/langscan/rpmspec.rb +71 -0
- data/lib/langscan/ruby.rb +164 -0
- data/lib/langscan/ruby/compat/README +5 -0
- data/lib/langscan/ruby/compat/ripper.rb +4 -0
- data/lib/langscan/ruby/compat/ripper.so +0 -0
- data/lib/langscan/ruby/compat/ripper/core.rb +918 -0
- data/lib/langscan/ruby/compat/ripper/filter.rb +70 -0
- data/lib/langscan/ruby/compat/ripper/lexer.rb +179 -0
- data/lib/langscan/ruby/compat/ripper/sexp.rb +100 -0
- data/lib/langscan/scheme.rb +160 -0
- data/lib/langscan/scheme/scheme.so +0 -0
- data/lib/langscan/sh.rb +116 -0
- data/lib/langscan/sh/sh.so +0 -0
- data/lib/langscan/text.rb +37 -0
- data/metaconfig +2 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/setup.rb +1604 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/langscan.rake +42 -0
- data/test/langscan/brainfuck/test/test_scan.rb +55 -0
- data/test/langscan/c/test/test_scan.rb +216 -0
- data/test/langscan/c/test/test_token.rb +41 -0
- data/test/langscan/csharp/test/test_scan.rb +157 -0
- data/test/langscan/css/test/test_css.rb +79 -0
- data/test/langscan/d/test/test_scan.rb +233 -0
- data/test/langscan/d/test/test_token.rb +205 -0
- data/test/langscan/eiffel/test/test_eiffel.rb +95 -0
- data/test/langscan/elisp/test/test_elisp.rb +177 -0
- data/test/langscan/io/test/test_io.rb +79 -0
- data/test/langscan/java/test/test_java.rb +74 -0
- data/test/langscan/javascript/test/test_javascript.rb +39 -0
- data/test/langscan/lua/test/test_lua.rb +69 -0
- data/test/langscan/ocaml/test/test_ocaml.rb +161 -0
- data/test/langscan/php/test/test_scan.rb +138 -0
- data/test/langscan/python/test/test_scan.rb +105 -0
- data/test/langscan/rpmspec/test/test_rpmspec.rb +51 -0
- data/test/langscan/ruby/test/test_scan.rb +71 -0
- data/test/langscan/scheme/test/test_scan.rb +198 -0
- data/test/test_helper.rb +7 -0
- data/test/test_langscan.rb +123 -0
- metadata +320 -0
@@ -0,0 +1,147 @@
|
|
1
|
+
/*
|
2
|
+
* javascript.l - a lex rule for JavaScript
|
3
|
+
*
|
4
|
+
* Copyright (C) 2005 Keisuke Nishida <knishida@open-cobol.org>
|
5
|
+
* All rights reserved.
|
6
|
+
* This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
*
|
8
|
+
* You can redistribute it and/or modify it under the terms of
|
9
|
+
* the GNU General Public License version 2.
|
10
|
+
*/
|
11
|
+
|
12
|
+
%option reentrant
|
13
|
+
%option prefix="langscan_javascript_lex_"
|
14
|
+
%option noyywrap
|
15
|
+
%option nodefault
|
16
|
+
|
17
|
+
slash \/
|
18
|
+
star \*
|
19
|
+
nonstar [^\*]
|
20
|
+
nonslashstar [^\/\*]
|
21
|
+
commentcontent {star}+{nonslashstar}{nonstar}*
|
22
|
+
comment {slash}{star}{nonstar}*{commentcontent}*{star}+{slash}
|
23
|
+
|
24
|
+
%{
|
25
|
+
|
26
|
+
#include "javascript.h"
|
27
|
+
|
28
|
+
#define YY_EXTRA_TYPE langscan_javascript_lex_extra_t *
|
29
|
+
|
30
|
+
#if YY_NULL != 0
|
31
|
+
#error "YY_NULL is not 0."
|
32
|
+
#endif
|
33
|
+
|
34
|
+
#define YY_DECL langscan_javascript_token_t langscan_javascript_lex_lex(yyscan_t yyscanner)
|
35
|
+
|
36
|
+
#define YY_INPUT(buf,result,max_size) \
|
37
|
+
if (!yyextra->eof) { \
|
38
|
+
result = yyextra->user_read(&(yyextra->user_data), (buf), (max_size)); \
|
39
|
+
if (result == 0) \
|
40
|
+
yyextra->eof = 1; \
|
41
|
+
}
|
42
|
+
|
43
|
+
#define UPD update_pos(yyextra, yytext, yyleng)
|
44
|
+
static void update_pos(langscan_javascript_lex_extra_t *, char *, int);
|
45
|
+
|
46
|
+
#define report(token) \
|
47
|
+
do { \
|
48
|
+
yyextra->text = yytext; \
|
49
|
+
yyextra->leng = yyleng; \
|
50
|
+
return langscan_javascript_##token; \
|
51
|
+
} while (0)
|
52
|
+
|
53
|
+
%}
|
54
|
+
|
55
|
+
%%
|
56
|
+
[ \t\f\r]+ { UPD; report(space); }
|
57
|
+
\n { UPD; report(space); }
|
58
|
+
"//".* { UPD; report(comment); }
|
59
|
+
{comment} { UPD; report(comment); }
|
60
|
+
\"([^\\\"]|\\.)*\" { UPD; report(string); }
|
61
|
+
[A-Za-z_][0-9A-Za-z_]* { UPD; report(ident); }
|
62
|
+
. { UPD; report(punct); }
|
63
|
+
|
64
|
+
%%
|
65
|
+
|
66
|
+
static void update_pos(
|
67
|
+
langscan_javascript_lex_extra_t *extra,
|
68
|
+
char *text,
|
69
|
+
int leng)
|
70
|
+
{
|
71
|
+
int i, j;
|
72
|
+
extra->beg_byteno = extra->end_byteno;
|
73
|
+
extra->beg_lineno = extra->end_lineno;
|
74
|
+
extra->beg_columnno = extra->end_columnno;
|
75
|
+
j = 0;
|
76
|
+
for (i = 0; i < leng; i++) {
|
77
|
+
if (text[i] == '\n') {
|
78
|
+
extra->end_lineno++;
|
79
|
+
j = i + 1;
|
80
|
+
extra->end_columnno = 0;
|
81
|
+
}
|
82
|
+
}
|
83
|
+
extra->end_columnno += leng - j;
|
84
|
+
extra->end_byteno += leng;
|
85
|
+
}
|
86
|
+
|
87
|
+
langscan_javascript_tokenizer_t *langscan_javascript_make_tokenizer(
|
88
|
+
size_t (*user_read)(void **user_data_p, char *buf, size_t maxlen),
|
89
|
+
void *user_data)
|
90
|
+
{
|
91
|
+
langscan_javascript_tokenizer_t *tokenizer;
|
92
|
+
langscan_javascript_lex_extra_t *extra;
|
93
|
+
tokenizer = (langscan_javascript_tokenizer_t *)malloc(sizeof(langscan_javascript_tokenizer_t));
|
94
|
+
if (tokenizer == NULL)
|
95
|
+
return NULL;
|
96
|
+
extra = (langscan_javascript_lex_extra_t *)malloc(sizeof(langscan_javascript_lex_extra_t));
|
97
|
+
if (extra == NULL)
|
98
|
+
return NULL;
|
99
|
+
extra->user_read = user_read;
|
100
|
+
extra->user_data = user_data;
|
101
|
+
extra->beg_lineno = 1;
|
102
|
+
extra->beg_columnno = 0;
|
103
|
+
extra->beg_byteno = 0;
|
104
|
+
extra->end_lineno = 1;
|
105
|
+
extra->end_columnno = 0;
|
106
|
+
extra->end_byteno = 0;
|
107
|
+
extra->eof = 0;
|
108
|
+
tokenizer->extra = extra;
|
109
|
+
langscan_javascript_lex_lex_init(&tokenizer->scanner);
|
110
|
+
langscan_javascript_lex_set_extra(extra, tokenizer->scanner);
|
111
|
+
return tokenizer;
|
112
|
+
}
|
113
|
+
|
114
|
+
langscan_javascript_token_t langscan_javascript_get_token(langscan_javascript_tokenizer_t *tokenizer)
|
115
|
+
{
|
116
|
+
return langscan_javascript_lex_lex(tokenizer->scanner);
|
117
|
+
}
|
118
|
+
|
119
|
+
void langscan_javascript_free_tokenizer(langscan_javascript_tokenizer_t *tokenizer)
|
120
|
+
{
|
121
|
+
langscan_javascript_lex_extra_t *extra = langscan_javascript_lex_get_extra(tokenizer->scanner);
|
122
|
+
free((void *)extra);
|
123
|
+
langscan_javascript_lex_lex_destroy(tokenizer->scanner);
|
124
|
+
free((void *)tokenizer);
|
125
|
+
}
|
126
|
+
|
127
|
+
user_read_t langscan_javascript_tokenizer_get_user_read(langscan_javascript_tokenizer_t *tokenizer)
|
128
|
+
{
|
129
|
+
return tokenizer->extra->user_read;
|
130
|
+
}
|
131
|
+
|
132
|
+
void *langscan_javascript_tokenizer_get_user_data(langscan_javascript_tokenizer_t *tokenizer)
|
133
|
+
{
|
134
|
+
return tokenizer->extra->user_data;
|
135
|
+
}
|
136
|
+
|
137
|
+
const char *langscan_javascript_token_name(langscan_javascript_token_t token)
|
138
|
+
{
|
139
|
+
static char *token_names[] = {
|
140
|
+
"*eof*",
|
141
|
+
#define LANGSCAN_JAVASCRIPT_TOKEN(name) #name,
|
142
|
+
LANGSCAN_JAVASCRIPT_TOKEN_LIST
|
143
|
+
#undef LANGSCAN_JAVASCRIPT_TOKEN
|
144
|
+
};
|
145
|
+
|
146
|
+
return token_names[token];
|
147
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
JavaScript
|
@@ -0,0 +1,188 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = .
|
7
|
+
topdir = C:/usr/ruby/lib/ruby/1.8/i386-mswin32
|
8
|
+
hdrdir = $(topdir)
|
9
|
+
VPATH = $(srcdir);$(topdir);$(hdrdir)
|
10
|
+
|
11
|
+
DESTDIR = C:
|
12
|
+
exec_prefix = $(prefix)
|
13
|
+
prefix = $(DESTDIR)/usr/ruby
|
14
|
+
sharedstatedir = $(DESTDIR)/etc
|
15
|
+
mandir = $(prefix)/man
|
16
|
+
oldincludedir = $(DESTDIR)/usr/include
|
17
|
+
bindir = $(exec_prefix)/bin
|
18
|
+
libexecdir = $(exec_prefix)/libexec
|
19
|
+
sitedir = $(prefix)/lib/ruby/site_ruby
|
20
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
21
|
+
includedir = $(prefix)/include
|
22
|
+
infodir = $(prefix)/info
|
23
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
24
|
+
sysconfdir = $(prefix)/etc
|
25
|
+
libdir = $(exec_prefix)/lib
|
26
|
+
sbindir = $(exec_prefix)/sbin
|
27
|
+
rubylibdir = $(libdir)/ruby/$(ruby_version)
|
28
|
+
vendordir = $(prefix)/lib/ruby/vendor_ruby
|
29
|
+
archdir = $(rubylibdir)/$(arch)
|
30
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
31
|
+
datadir = $(prefix)/share
|
32
|
+
localstatedir = $(DESTDIR)/var
|
33
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
34
|
+
|
35
|
+
CC = cl -nologo
|
36
|
+
LIBRUBY = $(RUBY_SO_NAME).lib
|
37
|
+
LIBRUBY_A = $(RUBY_SO_NAME)-static.lib
|
38
|
+
LIBRUBYARG_SHARED = $(LIBRUBY)
|
39
|
+
LIBRUBYARG_STATIC = $(LIBRUBY_A)
|
40
|
+
|
41
|
+
RUBY_EXTCONF_H =
|
42
|
+
CFLAGS = -MT -Zi -O2b2xg- -G6
|
43
|
+
INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
|
44
|
+
DEFS =
|
45
|
+
CPPFLAGS =
|
46
|
+
CXXFLAGS = $(CFLAGS)
|
47
|
+
ldflags =
|
48
|
+
dldflags = -link -incremental:no -debug -opt:ref -opt:icf -dll $(LIBPATH)
|
49
|
+
archflag =
|
50
|
+
DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
|
51
|
+
LDSHARED = cl -nologo -LD
|
52
|
+
AR = lib -nologo
|
53
|
+
EXEEXT = .exe
|
54
|
+
|
55
|
+
RUBY_INSTALL_NAME = ruby
|
56
|
+
RUBY_SO_NAME = msvcrt-ruby18
|
57
|
+
arch = i386-mswin32
|
58
|
+
sitearch = i386-msvcrt
|
59
|
+
ruby_version = 1.8
|
60
|
+
ruby = C:/usr/ruby/bin/ruby
|
61
|
+
RUBY = $(ruby:/=\)
|
62
|
+
RM = $(RUBY) -run -e rm -- -f
|
63
|
+
MAKEDIRS = @$(RUBY) -run -e mkdir -- -p
|
64
|
+
INSTALL = @$(RUBY) -run -e install -- -vp
|
65
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
66
|
+
INSTALL_DATA = $(INSTALL) -m 0644
|
67
|
+
COPY = copy > nul
|
68
|
+
|
69
|
+
#### End of system configuration section. ####
|
70
|
+
|
71
|
+
preload =
|
72
|
+
|
73
|
+
libpath = . $(libdir)
|
74
|
+
LIBPATH = -libpath:"." -libpath:"$(libdir)"
|
75
|
+
DEFFILE = $(TARGET)-$(arch).def
|
76
|
+
|
77
|
+
CLEANFILES = mkmf.log
|
78
|
+
DISTCLEANFILES = vc*.pdb $(DEFFILE)
|
79
|
+
|
80
|
+
extout =
|
81
|
+
extout_prefix =
|
82
|
+
target_prefix = /langscan/pairmatcher
|
83
|
+
LOCAL_LIBS =
|
84
|
+
LIBS = $(LIBRUBYARG_SHARED) oldnames.lib user32.lib advapi32.lib shell32.lib ws2_32.lib
|
85
|
+
SRCS = pairmatcher.c
|
86
|
+
OBJS = pairmatcher.obj
|
87
|
+
TARGET = pairmatcher
|
88
|
+
DLLIB = $(TARGET).so
|
89
|
+
EXTSTATIC =
|
90
|
+
STATIC_LIB =
|
91
|
+
|
92
|
+
BINDIR = $(bindir)
|
93
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
94
|
+
RUBYLIBDIR = C:/devel/gem/git/langscan/pkg/langscan-1.2.gem.build/lib$(target_prefix)
|
95
|
+
RUBYARCHDIR = C:/devel/gem/git/langscan/pkg/langscan-1.2.gem.build/lib$(target_prefix)
|
96
|
+
|
97
|
+
TARGET_SO = $(DLLIB)
|
98
|
+
CLEANLIBS = $(TARGET).so $(TARGET).il? $(TARGET).tds $(TARGET).map
|
99
|
+
CLEANOBJS = *.obj *.lib *.s[ol] *.pdb *.exp *.bak
|
100
|
+
|
101
|
+
all: $(DLLIB)
|
102
|
+
static: $(STATIC_LIB)
|
103
|
+
|
104
|
+
clean:
|
105
|
+
@-$(RM) $(CLEANLIBS:/=\) $(CLEANOBJS:/=\) $(CLEANFILES:/=\)
|
106
|
+
|
107
|
+
distclean: clean
|
108
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
109
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES:/=\)
|
110
|
+
|
111
|
+
realclean: distclean
|
112
|
+
install: install-so install-rb
|
113
|
+
|
114
|
+
install-so: $(RUBYARCHDIR)
|
115
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
116
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
117
|
+
$(INSTALL_PROG) $(DLLIB:/=\) $(RUBYARCHDIR:/=\)
|
118
|
+
install-rb: pre-install-rb install-rb-default
|
119
|
+
install-rb-default: pre-install-rb-default
|
120
|
+
pre-install-rb: Makefile
|
121
|
+
pre-install-rb-default: Makefile
|
122
|
+
$(RUBYARCHDIR):
|
123
|
+
$(MAKEDIRS) $@
|
124
|
+
|
125
|
+
site-install: site-install-so site-install-rb
|
126
|
+
site-install-so: install-so
|
127
|
+
site-install-rb: install-rb
|
128
|
+
|
129
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .obj
|
130
|
+
|
131
|
+
{$(hdrdir)}.cc{}.obj:
|
132
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
133
|
+
|
134
|
+
{$(topdir)}.cc{}.obj:
|
135
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
136
|
+
|
137
|
+
{$(srcdir)}.cc{}.obj:
|
138
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
139
|
+
|
140
|
+
.cc.obj:
|
141
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
142
|
+
|
143
|
+
{$(hdrdir)}.cxx{}.obj:
|
144
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
145
|
+
|
146
|
+
{$(topdir)}.cxx{}.obj:
|
147
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
148
|
+
|
149
|
+
{$(srcdir)}.cxx{}.obj:
|
150
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
151
|
+
|
152
|
+
.cxx.obj:
|
153
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
154
|
+
|
155
|
+
{$(hdrdir)}.cpp{}.obj:
|
156
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
157
|
+
|
158
|
+
{$(topdir)}.cpp{}.obj:
|
159
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
160
|
+
|
161
|
+
{$(srcdir)}.cpp{}.obj:
|
162
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
163
|
+
|
164
|
+
.cpp.obj:
|
165
|
+
$(CXX) $(INCFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -Tp$(<:\=/)
|
166
|
+
|
167
|
+
{$(hdrdir)}.c{}.obj:
|
168
|
+
$(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
|
169
|
+
|
170
|
+
{$(topdir)}.c{}.obj:
|
171
|
+
$(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
|
172
|
+
|
173
|
+
{$(srcdir)}.c{}.obj:
|
174
|
+
$(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
|
175
|
+
|
176
|
+
.c.obj:
|
177
|
+
$(CC) $(INCFLAGS) $(CFLAGS) $(CPPFLAGS) -c -Tc$(<:\=/)
|
178
|
+
|
179
|
+
$(DLLIB): $(DEFFILE) $(OBJS) Makefile
|
180
|
+
@-$(RM) $@
|
181
|
+
$(LDSHARED) -Fe$(@) $(OBJS) $(LIBS) $(LOCAL_LIBS) $(DLDFLAGS) -implib:$(*F:.so=)-$(arch).lib -pdb:$(*F:.so=)-$(arch).pdb -def:$(DEFFILE)
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
$(DEFFILE):
|
186
|
+
$(RUBY) -e "puts 'EXPORTS', 'Init_$(TARGET)'" > $@
|
187
|
+
|
188
|
+
$(OBJS): {.;$(VPATH)}ruby.h {.;$(VPATH)}defines.h
|
@@ -0,0 +1,890 @@
|
|
1
|
+
/*
|
2
|
+
* pairmatcher.c - a pair matching parser
|
3
|
+
*
|
4
|
+
* Copyright (C) 2005 Akira Tanaka <akr@m17n.org>
|
5
|
+
* All rights reserved.
|
6
|
+
* This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
*
|
8
|
+
* You can redistribute it and/or modify it under the terms of
|
9
|
+
* the GNU General Public License version 2.
|
10
|
+
*/
|
11
|
+
|
12
|
+
#include <ruby.h>
|
13
|
+
|
14
|
+
static ID id_get_token, id_new, id_call;
|
15
|
+
static VALUE Fragment;
|
16
|
+
|
17
|
+
#ifndef RSTRUCT_PTR
|
18
|
+
# define RSTRUCT_PTR(st) (RSTRUCT(st)->ptr)
|
19
|
+
#endif
|
20
|
+
#ifndef RSTRUCT_LEN
|
21
|
+
# define RSTRUCT_LEN(st) (RSTRUCT(st)->len)
|
22
|
+
#endif
|
23
|
+
|
24
|
+
#ifndef RARRAY_PTR
|
25
|
+
# define RARRAY_PTR(str) (RARRAY(str)->ptr)
|
26
|
+
#endif
|
27
|
+
#ifndef RARRAY_LEN
|
28
|
+
# define RARRAY_LEN(str) (RARRAY(str)->len)
|
29
|
+
#endif
|
30
|
+
|
31
|
+
#ifdef SYMBOL_P
|
32
|
+
# define Check_Symbol(val) do { if (!SYMBOL_P(val)) { Check_Type(val, T_SYMBOL); } } while (0)
|
33
|
+
#else
|
34
|
+
# define Check_Symbol(val) Check_Type(val, T_SYMBOL)
|
35
|
+
#endif
|
36
|
+
|
37
|
+
static VALUE
|
38
|
+
fragment_type(VALUE fragment)
|
39
|
+
{
|
40
|
+
VALUE val;
|
41
|
+
Check_Type(fragment, T_STRUCT);
|
42
|
+
if (RBASIC(fragment)->klass != Fragment) {
|
43
|
+
rb_raise(rb_eTypeError, "not fragment");
|
44
|
+
}
|
45
|
+
val = RSTRUCT_PTR(fragment)[0];
|
46
|
+
Check_Symbol(val);
|
47
|
+
return val;
|
48
|
+
}
|
49
|
+
|
50
|
+
static VALUE
|
51
|
+
fragment_text(VALUE fragment)
|
52
|
+
{
|
53
|
+
VALUE val;
|
54
|
+
Check_Type(fragment, T_STRUCT);
|
55
|
+
if (RBASIC(fragment)->klass != Fragment) {
|
56
|
+
rb_raise(rb_eTypeError, "not fragment");
|
57
|
+
}
|
58
|
+
val = RSTRUCT_PTR(fragment)[1];
|
59
|
+
StringValue(val);
|
60
|
+
return val;
|
61
|
+
}
|
62
|
+
|
63
|
+
static int
|
64
|
+
fragment_byteno(VALUE fragment)
|
65
|
+
{
|
66
|
+
VALUE val;
|
67
|
+
Check_Type(fragment, T_STRUCT);
|
68
|
+
if (RBASIC(fragment)->klass != Fragment) {
|
69
|
+
rb_raise(rb_eTypeError, "not fragment");
|
70
|
+
}
|
71
|
+
val = RSTRUCT_PTR(fragment)[3];
|
72
|
+
return NUM2INT(val);
|
73
|
+
}
|
74
|
+
|
75
|
+
typedef struct {
|
76
|
+
unsigned char before_open_max;
|
77
|
+
unsigned char after_open_max;
|
78
|
+
unsigned char before_close_max;
|
79
|
+
unsigned char after_close_max;
|
80
|
+
VALUE pair_defs;
|
81
|
+
VALUE intertoken_defs;
|
82
|
+
VALUE recent_tokens;
|
83
|
+
VALUE pair_stack;
|
84
|
+
VALUE closed_pairs;
|
85
|
+
} pairmatcher_t;
|
86
|
+
|
87
|
+
static void pairmatcher_mark(pairmatcher_t *pairmatcher)
|
88
|
+
{
|
89
|
+
if (pairmatcher == NULL)
|
90
|
+
return;
|
91
|
+
rb_gc_mark(pairmatcher->pair_defs);
|
92
|
+
rb_gc_mark(pairmatcher->intertoken_defs);
|
93
|
+
rb_gc_mark(pairmatcher->recent_tokens);
|
94
|
+
rb_gc_mark(pairmatcher->pair_stack);
|
95
|
+
rb_gc_mark(pairmatcher->closed_pairs);
|
96
|
+
}
|
97
|
+
|
98
|
+
static void pairmatcher_free(pairmatcher_t *pairmatcher)
|
99
|
+
{
|
100
|
+
if (pairmatcher == NULL)
|
101
|
+
return;
|
102
|
+
free((void *)pairmatcher);
|
103
|
+
}
|
104
|
+
|
105
|
+
static VALUE pairmatcher_s_allocate(VALUE klass)
|
106
|
+
{
|
107
|
+
return Data_Wrap_Struct(klass, pairmatcher_mark, pairmatcher_free, NULL);
|
108
|
+
}
|
109
|
+
|
110
|
+
static VALUE pairmatcher_initialize(
|
111
|
+
VALUE self,
|
112
|
+
VALUE before_open_max,
|
113
|
+
VALUE after_open_max,
|
114
|
+
VALUE before_close_max,
|
115
|
+
VALUE after_close_max)
|
116
|
+
{
|
117
|
+
pairmatcher_t *pairmatcher;
|
118
|
+
|
119
|
+
Data_Get_Struct(self, pairmatcher_t, pairmatcher);
|
120
|
+
if (pairmatcher != NULL) { rb_raise(rb_eArgError, "called twice"); }
|
121
|
+
|
122
|
+
pairmatcher = ALLOC(pairmatcher_t);
|
123
|
+
pairmatcher->pair_defs = Qnil;
|
124
|
+
pairmatcher->intertoken_defs = Qnil;
|
125
|
+
pairmatcher->recent_tokens = Qnil;
|
126
|
+
pairmatcher->pair_stack = Qnil;
|
127
|
+
pairmatcher->closed_pairs = Qnil;
|
128
|
+
DATA_PTR(self) = pairmatcher;
|
129
|
+
|
130
|
+
pairmatcher->before_open_max = NUM2INT(before_open_max);
|
131
|
+
pairmatcher->after_open_max = NUM2INT(after_open_max);
|
132
|
+
pairmatcher->before_close_max = NUM2INT(before_close_max);
|
133
|
+
pairmatcher->after_close_max = NUM2INT(after_close_max);
|
134
|
+
pairmatcher->pair_defs = rb_ary_new();
|
135
|
+
//RBASIC(pairmatcher->pair_defs)->klass = 0;
|
136
|
+
pairmatcher->intertoken_defs = rb_ary_new();
|
137
|
+
//RBASIC(pairmatcher->intertoken_defs)->klass = 0;
|
138
|
+
pairmatcher->recent_tokens = rb_ary_new();
|
139
|
+
//RBASIC(pairmatcher->recent_tokens)->klass = 0;
|
140
|
+
pairmatcher->pair_stack = rb_ary_new();
|
141
|
+
//RBASIC(pairmatcher->pair_stack)->klass = 0;
|
142
|
+
pairmatcher->closed_pairs = rb_ary_new();
|
143
|
+
//RBASIC(pairmatcher->closed_pairs)->klass = 0;
|
144
|
+
return self;
|
145
|
+
}
|
146
|
+
|
147
|
+
#define GetPM(obj, var) \
|
148
|
+
do { \
|
149
|
+
Data_Get_Struct((obj), pairmatcher_t, (var)); \
|
150
|
+
if ((var) == NULL) { rb_raise(rb_eArgError, "not initialized"); } \
|
151
|
+
} while(0)
|
152
|
+
|
153
|
+
static VALUE
|
154
|
+
pairmatcher_get_before_open_max(VALUE self)
|
155
|
+
{
|
156
|
+
pairmatcher_t *pairmatcher;
|
157
|
+
GetPM(self, pairmatcher);
|
158
|
+
if (pairmatcher == NULL) { rb_raise(rb_eArgError, "not initialized"); }
|
159
|
+
return INT2NUM(pairmatcher->before_open_max);
|
160
|
+
}
|
161
|
+
|
162
|
+
static VALUE
|
163
|
+
pairmatcher_get_after_open_max(VALUE self)
|
164
|
+
{
|
165
|
+
pairmatcher_t *pairmatcher;
|
166
|
+
GetPM(self, pairmatcher);
|
167
|
+
return INT2NUM(pairmatcher->after_open_max);
|
168
|
+
}
|
169
|
+
|
170
|
+
static VALUE
|
171
|
+
pairmatcher_get_before_close_max(VALUE self)
|
172
|
+
{
|
173
|
+
pairmatcher_t *pairmatcher;
|
174
|
+
GetPM(self, pairmatcher);
|
175
|
+
return INT2NUM(pairmatcher->before_close_max);
|
176
|
+
}
|
177
|
+
|
178
|
+
static VALUE
|
179
|
+
pairmatcher_get_after_close_max(VALUE self)
|
180
|
+
{
|
181
|
+
pairmatcher_t *pairmatcher;
|
182
|
+
GetPM(self, pairmatcher);
|
183
|
+
return INT2NUM(pairmatcher->after_close_max);
|
184
|
+
}
|
185
|
+
|
186
|
+
static VALUE
|
187
|
+
pairmatcher_define_pair(VALUE self, VALUE pair_type, VALUE open_type, VALUE open_text, VALUE close_type, VALUE close_text)
|
188
|
+
{
|
189
|
+
pairmatcher_t *pairmatcher;
|
190
|
+
VALUE def;
|
191
|
+
Check_Symbol(open_type);
|
192
|
+
if (open_text != Qnil) {
|
193
|
+
StringValue(open_text);
|
194
|
+
open_text = rb_str_new4(open_text);
|
195
|
+
}
|
196
|
+
Check_Symbol(close_type);
|
197
|
+
if (close_text != Qnil) {
|
198
|
+
StringValue(close_text);
|
199
|
+
close_text = rb_str_new4(close_text);
|
200
|
+
}
|
201
|
+
|
202
|
+
def = rb_ary_new3(5, open_type, open_text, close_type, close_text, pair_type);
|
203
|
+
//RBASIC(def)->klass = 0;
|
204
|
+
|
205
|
+
GetPM(self, pairmatcher);
|
206
|
+
rb_ary_push(pairmatcher->pair_defs, def);
|
207
|
+
|
208
|
+
return Qnil;
|
209
|
+
}
|
210
|
+
|
211
|
+
static VALUE
|
212
|
+
pairmatcher_define_intertoken_fragment(VALUE self, VALUE type, VALUE text)
|
213
|
+
{
|
214
|
+
pairmatcher_t *pairmatcher;
|
215
|
+
VALUE def;
|
216
|
+
Check_Symbol(type);
|
217
|
+
if (text != Qnil) {
|
218
|
+
StringValue(text);
|
219
|
+
text = rb_str_new4(text);
|
220
|
+
}
|
221
|
+
|
222
|
+
def = rb_ary_new3(2, type, text);
|
223
|
+
//RBASIC(def)->klass = 0;
|
224
|
+
|
225
|
+
GetPM(self, pairmatcher);
|
226
|
+
rb_ary_push(pairmatcher->intertoken_defs, def);
|
227
|
+
|
228
|
+
return Qnil;
|
229
|
+
}
|
230
|
+
|
231
|
+
static VALUE
|
232
|
+
get_token(VALUE tokenizer)
|
233
|
+
{
|
234
|
+
return rb_funcall(tokenizer, id_get_token, 0);
|
235
|
+
}
|
236
|
+
|
237
|
+
static VALUE
|
238
|
+
open_token_p(pairmatcher_t *pairmatcher, VALUE token)
|
239
|
+
{
|
240
|
+
int i;
|
241
|
+
VALUE type = fragment_type(token);
|
242
|
+
VALUE text = fragment_text(token);
|
243
|
+
for (i = 0; i < RARRAY_LEN(pairmatcher->pair_defs); i++) {
|
244
|
+
VALUE def = RARRAY_PTR(pairmatcher->pair_defs)[i];
|
245
|
+
VALUE open_type = RARRAY_PTR(def)[0];
|
246
|
+
VALUE open_text = RARRAY_PTR(def)[1];
|
247
|
+
if (open_type == Qnil || open_type == type) {
|
248
|
+
if (open_text == Qnil || rb_str_cmp(open_text, text) == 0) {
|
249
|
+
return def;
|
250
|
+
}
|
251
|
+
}
|
252
|
+
}
|
253
|
+
return Qfalse;
|
254
|
+
}
|
255
|
+
|
256
|
+
static VALUE
|
257
|
+
close_token_p(pairmatcher_t *pairmatcher, VALUE token, int *i)
|
258
|
+
{
|
259
|
+
VALUE type = fragment_type(token);
|
260
|
+
VALUE text = fragment_text(token);
|
261
|
+
for (; *i < RARRAY_LEN(pairmatcher->pair_defs); (*i)++) {
|
262
|
+
VALUE def = RARRAY_PTR(pairmatcher->pair_defs)[*i];
|
263
|
+
VALUE close_type = RARRAY_PTR(def)[2];
|
264
|
+
VALUE close_text = RARRAY_PTR(def)[3];
|
265
|
+
if (close_type == Qnil || close_type == type) {
|
266
|
+
if (close_text == Qnil || rb_str_cmp(close_text, text) == 0) {
|
267
|
+
return def;
|
268
|
+
}
|
269
|
+
}
|
270
|
+
}
|
271
|
+
return Qfalse;
|
272
|
+
}
|
273
|
+
|
274
|
+
#define pair_get_pair_def(pair) (RSTRUCT_PTR(pair)[0])
|
275
|
+
#define pair_get_before_open_len(pair) FIX2INT(RSTRUCT_PTR(pair)[1])
|
276
|
+
#define pair_get_around_open_tokens(pair) (RSTRUCT_PTR(pair)[2])
|
277
|
+
#define pair_get_before_close_len(pair) FIX2INT(RSTRUCT_PTR(pair)[3])
|
278
|
+
#define pair_get_around_close_tokens(pair) (RSTRUCT_PTR(pair)[4])
|
279
|
+
#define pair_get_outer(pair) (RSTRUCT_PTR(pair)[5])
|
280
|
+
#define pair_set_pair_def(pair, val) (RSTRUCT_PTR(pair)[0] = (val))
|
281
|
+
#define pair_set_before_open_len(pair, len) (RSTRUCT_PTR(pair)[1] = INT2FIX(len))
|
282
|
+
#define pair_set_around_open_tokens(pair, val) (RSTRUCT_PTR(pair)[2] = (val))
|
283
|
+
#define pair_set_before_close_len(pair, len) (RSTRUCT_PTR(pair)[3] = INT2FIX(len))
|
284
|
+
#define pair_set_around_close_tokens(pair, val) (RSTRUCT_PTR(pair)[4] = (val))
|
285
|
+
|
286
|
+
#define pair_get_after_open_len(pair) (RARRAY_LEN(pair_get_around_open_tokens(pair))-pair_get_before_open_len(pair)-1)
|
287
|
+
#define pair_get_after_close_len(pair) (RARRAY_LEN(pair_get_around_close_tokens(pair))-pair_get_before_close_len(pair)-1)
|
288
|
+
|
289
|
+
static VALUE Pair;
|
290
|
+
|
291
|
+
static VALUE
|
292
|
+
make_pair(VALUE pair_def, int before_open_len, VALUE around_open_tokens, VALUE outer)
|
293
|
+
{
|
294
|
+
VALUE pair = rb_struct_new(Pair,
|
295
|
+
pair_def,
|
296
|
+
INT2FIX(before_open_len),
|
297
|
+
around_open_tokens,
|
298
|
+
Qnil,
|
299
|
+
Qnil,
|
300
|
+
outer);
|
301
|
+
return pair;
|
302
|
+
}
|
303
|
+
|
304
|
+
static int
|
305
|
+
concat_recent_tokens(pairmatcher_t *pm, int max, VALUE ary)
|
306
|
+
{
|
307
|
+
int i;
|
308
|
+
if (RARRAY_LEN(pm->recent_tokens) <= max)
|
309
|
+
max = RARRAY_LEN(pm->recent_tokens);
|
310
|
+
for (i = 0; i < max; i++) {
|
311
|
+
rb_ary_push(ary, RARRAY_PTR(pm->recent_tokens)[RARRAY_LEN(pm->recent_tokens)-max+i]);
|
312
|
+
}
|
313
|
+
return max;
|
314
|
+
}
|
315
|
+
|
316
|
+
static void
|
317
|
+
put_open_token(pairmatcher_t *pm, VALUE open_token, VALUE pair_def)
|
318
|
+
{
|
319
|
+
int before_open_len, stack_len;
|
320
|
+
VALUE pair;
|
321
|
+
VALUE around_open_tokens;
|
322
|
+
around_open_tokens = rb_ary_new2(pm->before_open_max+1+pm->after_open_max);
|
323
|
+
before_open_len = concat_recent_tokens(pm, pm->before_open_max, around_open_tokens);
|
324
|
+
rb_ary_push(around_open_tokens, open_token);
|
325
|
+
stack_len = RARRAY_LEN(pm->pair_stack);
|
326
|
+
pair = make_pair(pair_def, before_open_len, around_open_tokens,
|
327
|
+
stack_len ? RARRAY_PTR(pm->pair_stack)[stack_len-1] : Qnil);
|
328
|
+
rb_ary_push(pm->pair_stack, pair);
|
329
|
+
}
|
330
|
+
|
331
|
+
static int
|
332
|
+
matching_open_depth(pairmatcher_t *pm, VALUE open_token, VALUE pair_def)
|
333
|
+
{
|
334
|
+
int i;
|
335
|
+
for (i = RARRAY_LEN(pm->pair_stack) - 1; 0 <= i; i--) {
|
336
|
+
if (pair_get_pair_def(RARRAY_PTR(pm->pair_stack)[i]) == pair_def) {
|
337
|
+
return i;
|
338
|
+
}
|
339
|
+
}
|
340
|
+
return -1;
|
341
|
+
}
|
342
|
+
|
343
|
+
static void
|
344
|
+
report_token_list_now(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len)
|
345
|
+
{
|
346
|
+
while (len) {
|
347
|
+
rb_funcall(reporter, id_call, 1, RARRAY_PTR(token_list)[beg]);
|
348
|
+
beg++;
|
349
|
+
len--;
|
350
|
+
}
|
351
|
+
}
|
352
|
+
|
353
|
+
static void
|
354
|
+
report_token_list_open_pair(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len)
|
355
|
+
{
|
356
|
+
int i;
|
357
|
+
for (i = RARRAY_LEN(pm->pair_stack) - 1; 0 <= i; i--) {
|
358
|
+
VALUE pair = RARRAY_PTR(pm->pair_stack)[i];
|
359
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
360
|
+
int first_byteno = fragment_byteno(RARRAY_PTR(around_open)[0]);
|
361
|
+
int last_byteno = fragment_byteno(RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1]);
|
362
|
+
if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg])) {
|
363
|
+
report_token_list_now(pm, reporter, token_list, beg, len);
|
364
|
+
return;
|
365
|
+
}
|
366
|
+
/* last_byteno >= fragment_byteno(RARRAY_PTR(token_list)[beg]) */
|
367
|
+
if (fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) < first_byteno)
|
368
|
+
continue;
|
369
|
+
/*
|
370
|
+
* fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno
|
371
|
+
* first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])
|
372
|
+
*/
|
373
|
+
if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])) {
|
374
|
+
int beg2 = beg+len-1;
|
375
|
+
int len2 = 1;
|
376
|
+
while (beg <= beg2 && last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg2-1])) {
|
377
|
+
beg2--;
|
378
|
+
len2++;
|
379
|
+
}
|
380
|
+
report_token_list_now(pm, reporter, token_list, beg2, len2);
|
381
|
+
len -= len2;
|
382
|
+
}
|
383
|
+
/*
|
384
|
+
* first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) <= last_byteno
|
385
|
+
* fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno
|
386
|
+
*/
|
387
|
+
while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1])) {
|
388
|
+
len--;
|
389
|
+
}
|
390
|
+
if (len == 0)
|
391
|
+
return;
|
392
|
+
}
|
393
|
+
report_token_list_now(pm, reporter, token_list, beg, len);
|
394
|
+
}
|
395
|
+
|
396
|
+
static void
|
397
|
+
report_token_list_rec_closed_pair(pairmatcher_t *pm, VALUE reporter, VALUE token_list, int beg, int len, int closed_pairs_index)
|
398
|
+
{
|
399
|
+
VALUE pair;
|
400
|
+
VALUE around_close, around_open, first, last;
|
401
|
+
int first_byteno, last_byteno;
|
402
|
+
if (RARRAY_LEN(pm->closed_pairs) <= closed_pairs_index) {
|
403
|
+
report_token_list_open_pair(pm, reporter, token_list, beg, len);
|
404
|
+
return;
|
405
|
+
}
|
406
|
+
pair = RARRAY_PTR(pm->closed_pairs)[closed_pairs_index];
|
407
|
+
if (pair == Qnil) {
|
408
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
|
409
|
+
return;
|
410
|
+
}
|
411
|
+
around_close = pair_get_around_close_tokens(pair);
|
412
|
+
first = RARRAY_PTR(around_close)[0];
|
413
|
+
first_byteno = fragment_byteno(first);
|
414
|
+
while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
|
415
|
+
len--;
|
416
|
+
if (len == 0)
|
417
|
+
return;
|
418
|
+
around_open = pair_get_around_open_tokens(pair);
|
419
|
+
first = RARRAY_PTR(around_open)[0];
|
420
|
+
first_byteno = fragment_byteno(first);
|
421
|
+
last = RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1];
|
422
|
+
last_byteno = fragment_byteno(last);
|
423
|
+
if (last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg]) ||
|
424
|
+
fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) < first_byteno) {
|
425
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
|
426
|
+
}
|
427
|
+
else if (first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg])) {
|
428
|
+
while (0 < len && fragment_byteno(RARRAY_PTR(token_list)[beg]) <= last_byteno) {
|
429
|
+
beg++;
|
430
|
+
len--;
|
431
|
+
}
|
432
|
+
if (len)
|
433
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
|
434
|
+
}
|
435
|
+
else if (fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]) <= last_byteno) {
|
436
|
+
while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
|
437
|
+
len--;
|
438
|
+
if (len)
|
439
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, closed_pairs_index+1);
|
440
|
+
}
|
441
|
+
else {
|
442
|
+
int beg1, len1;
|
443
|
+
int beg2, len2;
|
444
|
+
beg1 = beg;
|
445
|
+
len1 = 1;
|
446
|
+
while (len1 < len && fragment_byteno(RARRAY_PTR(token_list)[beg1+len1]) < first_byteno)
|
447
|
+
len1++;
|
448
|
+
beg2 = beg + len - 1;
|
449
|
+
len2 = 1;
|
450
|
+
while (beg <= beg2-1 && last_byteno < fragment_byteno(RARRAY_PTR(token_list)[beg2-1])) {
|
451
|
+
beg2--;
|
452
|
+
len2++;
|
453
|
+
}
|
454
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg1, len1, closed_pairs_index+1);
|
455
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg2, len2, closed_pairs_index+1);
|
456
|
+
}
|
457
|
+
}
|
458
|
+
|
459
|
+
static void
|
460
|
+
report_token_list(pairmatcher_t *pm, VALUE token_list, int beg, int len, VALUE reporter)
|
461
|
+
{
|
462
|
+
if (len < 0)
|
463
|
+
len = RARRAY_LEN(token_list) - beg;
|
464
|
+
if (len == 0)
|
465
|
+
return;
|
466
|
+
if (RARRAY_LEN(pm->recent_tokens) != 0) {
|
467
|
+
int first_byteno = fragment_byteno(RARRAY_PTR(pm->recent_tokens)[0]);
|
468
|
+
if (first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[0]))
|
469
|
+
return;
|
470
|
+
while (0 < len && first_byteno <= fragment_byteno(RARRAY_PTR(token_list)[beg+len-1]))
|
471
|
+
len--;
|
472
|
+
}
|
473
|
+
report_token_list_rec_closed_pair(pm, reporter, token_list, beg, len, 0);
|
474
|
+
}
|
475
|
+
|
476
|
+
static void
|
477
|
+
discard_unmatched_pair(pairmatcher_t *pm, VALUE reporter)
|
478
|
+
{
|
479
|
+
VALUE pair = rb_ary_pop(pm->pair_stack);
|
480
|
+
report_token_list(pm, pair_get_around_open_tokens(pair), 0, -1, reporter);
|
481
|
+
}
|
482
|
+
|
483
|
+
static void
|
484
|
+
put_close_token(pairmatcher_t *pm, VALUE close_token, VALUE pair_def, int depth, VALUE reporter)
|
485
|
+
{
|
486
|
+
VALUE pair, around_close_tokens;
|
487
|
+
int before_close_len;
|
488
|
+
while (depth+1 < RARRAY_LEN(pm->pair_stack)) {
|
489
|
+
discard_unmatched_pair(pm, reporter);
|
490
|
+
}
|
491
|
+
pair = rb_ary_pop(pm->pair_stack);
|
492
|
+
around_close_tokens = rb_ary_new2(pm->before_close_max+1+pm->after_close_max);
|
493
|
+
before_close_len = concat_recent_tokens(pm, pm->before_close_max, around_close_tokens);
|
494
|
+
rb_ary_push(around_close_tokens, close_token);
|
495
|
+
pair_set_before_close_len(pair, before_close_len);
|
496
|
+
pair_set_around_close_tokens(pair, around_close_tokens);
|
497
|
+
rb_ary_push(pm->closed_pairs, pair);
|
498
|
+
}
|
499
|
+
|
500
|
+
static void
|
501
|
+
add_recent(pairmatcher_t *pm, VALUE reporter, VALUE token)
|
502
|
+
{
|
503
|
+
int max = pm->before_open_max;
|
504
|
+
if (max < pm->before_close_max)
|
505
|
+
max = pm->before_close_max;
|
506
|
+
if (max == 0)
|
507
|
+
return;
|
508
|
+
if (RARRAY_LEN(pm->recent_tokens) < max) {
|
509
|
+
rb_ary_push(pm->recent_tokens, token);
|
510
|
+
}
|
511
|
+
else {
|
512
|
+
VALUE val = RARRAY_PTR(pm->recent_tokens)[0];
|
513
|
+
MEMMOVE(RARRAY_PTR(pm->recent_tokens),
|
514
|
+
RARRAY_PTR(pm->recent_tokens)+1,
|
515
|
+
VALUE, max-1);
|
516
|
+
RARRAY_PTR(pm->recent_tokens)[max-1] = token;
|
517
|
+
report_token_list(pm, rb_ary_new3(1, val), 0, -1, reporter);
|
518
|
+
}
|
519
|
+
}
|
520
|
+
|
521
|
+
static void
|
522
|
+
add_after_open(pairmatcher_t *pm, VALUE token)
|
523
|
+
{
|
524
|
+
int i;
|
525
|
+
int max = pm->after_open_max;
|
526
|
+
VALUE pair;
|
527
|
+
for (i = RARRAY_LEN(pm->pair_stack)-1; 0 <= i; i--) {
|
528
|
+
pair = RARRAY_PTR(pm->pair_stack)[i];
|
529
|
+
if (max <= pair_get_after_open_len(pair))
|
530
|
+
break;
|
531
|
+
rb_ary_push(pair_get_around_open_tokens(pair), token);
|
532
|
+
}
|
533
|
+
for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
|
534
|
+
pair = RARRAY_PTR(pm->closed_pairs)[i];
|
535
|
+
if (pair == Qnil)
|
536
|
+
continue;
|
537
|
+
if (pair_get_after_open_len(pair) < max)
|
538
|
+
rb_ary_push(pair_get_around_open_tokens(pair), token);
|
539
|
+
}
|
540
|
+
}
|
541
|
+
|
542
|
+
static void
|
543
|
+
add_after_close(pairmatcher_t *pm, VALUE token)
|
544
|
+
{
|
545
|
+
int i;
|
546
|
+
int max = pm->after_close_max;
|
547
|
+
VALUE pair;
|
548
|
+
for (i = RARRAY_LEN(pm->closed_pairs) - 1; 0 <= i; i--) {
|
549
|
+
pair = RARRAY_PTR(pm->closed_pairs)[i];
|
550
|
+
if (pair == Qnil)
|
551
|
+
continue;
|
552
|
+
if (max <= pair_get_after_close_len(pair))
|
553
|
+
break;
|
554
|
+
rb_ary_push(pair_get_around_close_tokens(pair), token);
|
555
|
+
}
|
556
|
+
}
|
557
|
+
|
558
|
+
static void
|
559
|
+
discard_matched_pair(pairmatcher_t *pm, VALUE pair, VALUE reporter)
|
560
|
+
{
|
561
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
562
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
563
|
+
if (fragment_byteno(RARRAY_PTR(around_close)[0]) <=
|
564
|
+
fragment_byteno(RARRAY_PTR(around_open)[RARRAY_LEN(around_open)-1])) {
|
565
|
+
int len1 = RARRAY_LEN(around_open);
|
566
|
+
while (0 < len1 && fragment_byteno(RARRAY_PTR(around_close)[0]) <= fragment_byteno(RARRAY_PTR(around_open)[len1-1]))
|
567
|
+
len1--;
|
568
|
+
if (len1) {
|
569
|
+
report_token_list(pm, around_open, 0, len1, reporter);
|
570
|
+
}
|
571
|
+
report_token_list(pm, around_close, 0, -1, reporter);
|
572
|
+
}
|
573
|
+
else {
|
574
|
+
report_token_list(pm, around_open, 0, -1, reporter);
|
575
|
+
report_token_list(pm, around_close, 0, -1, reporter);
|
576
|
+
}
|
577
|
+
}
|
578
|
+
|
579
|
+
static VALUE
|
580
|
+
ary_subseq(VALUE ary, int beg, int len)
|
581
|
+
{
|
582
|
+
VALUE argv[2];
|
583
|
+
argv[0] = INT2NUM(beg);
|
584
|
+
argv[1] = INT2NUM(len);
|
585
|
+
return rb_ary_aref(2, argv, ary);
|
586
|
+
}
|
587
|
+
|
588
|
+
static void
|
589
|
+
yield_pair(VALUE pair)
|
590
|
+
{
|
591
|
+
rb_yield(pair);
|
592
|
+
}
|
593
|
+
|
594
|
+
static VALUE
|
595
|
+
pair_before_open_tokens(VALUE pair)
|
596
|
+
{
|
597
|
+
int before_len;
|
598
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
599
|
+
before_len = pair_get_before_open_len(pair);
|
600
|
+
return ary_subseq(around_open, 0, before_len);
|
601
|
+
}
|
602
|
+
|
603
|
+
static VALUE
|
604
|
+
pair_around_open(VALUE pair, VALUE index)
|
605
|
+
{
|
606
|
+
int before_len;
|
607
|
+
int i = NUM2INT(index);
|
608
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
609
|
+
before_len = pair_get_before_open_len(pair);
|
610
|
+
if (-before_len <= i && i <= pair_get_after_open_len(pair))
|
611
|
+
return rb_ary_entry(around_open, before_len+i);
|
612
|
+
else
|
613
|
+
return Qnil;
|
614
|
+
}
|
615
|
+
|
616
|
+
static VALUE
|
617
|
+
pair_open_token(VALUE pair)
|
618
|
+
{
|
619
|
+
int before_len;
|
620
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
621
|
+
before_len = pair_get_before_open_len(pair);
|
622
|
+
return rb_ary_entry(around_open, before_len);
|
623
|
+
}
|
624
|
+
|
625
|
+
static VALUE
|
626
|
+
pair_after_open_tokens(VALUE pair)
|
627
|
+
{
|
628
|
+
int before_len, after_len;
|
629
|
+
VALUE around_open = pair_get_around_open_tokens(pair);
|
630
|
+
before_len = pair_get_before_open_len(pair);
|
631
|
+
after_len = pair_get_after_open_len(pair);
|
632
|
+
return ary_subseq(around_open, before_len+1, after_len);
|
633
|
+
return ary_subseq(around_open, 0, before_len);
|
634
|
+
}
|
635
|
+
|
636
|
+
static VALUE
|
637
|
+
pair_before_close_tokens(VALUE pair)
|
638
|
+
{
|
639
|
+
int before_len;
|
640
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
641
|
+
if (around_close == Qnil) return Qnil;
|
642
|
+
before_len = pair_get_before_close_len(pair);
|
643
|
+
return ary_subseq(around_close, 0, before_len);
|
644
|
+
}
|
645
|
+
|
646
|
+
static VALUE
|
647
|
+
pair_around_close(VALUE pair, VALUE index)
|
648
|
+
{
|
649
|
+
int before_len;
|
650
|
+
int i = NUM2INT(index);
|
651
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
652
|
+
if (around_close == Qnil) return Qnil;
|
653
|
+
before_len = pair_get_before_close_len(pair);
|
654
|
+
if (-before_len <= i && i <= pair_get_after_close_len(pair))
|
655
|
+
return rb_ary_entry(around_close, before_len+i);
|
656
|
+
else
|
657
|
+
return Qnil;
|
658
|
+
}
|
659
|
+
|
660
|
+
static VALUE
|
661
|
+
pair_close_token(VALUE pair)
|
662
|
+
{
|
663
|
+
int before_len;
|
664
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
665
|
+
if (around_close == Qnil) return Qnil;
|
666
|
+
before_len = pair_get_before_close_len(pair);
|
667
|
+
return rb_ary_entry(around_close, before_len);
|
668
|
+
}
|
669
|
+
|
670
|
+
static VALUE
|
671
|
+
pair_after_close_tokens(VALUE pair)
|
672
|
+
{
|
673
|
+
int before_len, after_len;
|
674
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
675
|
+
if (around_close == Qnil) return Qnil;
|
676
|
+
before_len = pair_get_before_close_len(pair);
|
677
|
+
after_len = pair_get_after_close_len(pair);
|
678
|
+
return ary_subseq(around_close, before_len+1, after_len);
|
679
|
+
return ary_subseq(around_close, 0, before_len);
|
680
|
+
}
|
681
|
+
|
682
|
+
static VALUE
|
683
|
+
pair_before_open_length(VALUE pair)
|
684
|
+
{
|
685
|
+
return INT2FIX(pair_get_before_open_len(pair));
|
686
|
+
}
|
687
|
+
|
688
|
+
static VALUE
|
689
|
+
pair_after_open_length(VALUE pair)
|
690
|
+
{
|
691
|
+
return INT2FIX(pair_get_after_open_len(pair));
|
692
|
+
}
|
693
|
+
|
694
|
+
static VALUE
|
695
|
+
pair_before_close_length(VALUE pair)
|
696
|
+
{
|
697
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
698
|
+
if (around_close == Qnil) return Qnil;
|
699
|
+
return INT2FIX(pair_get_before_close_len(pair));
|
700
|
+
}
|
701
|
+
|
702
|
+
static VALUE
|
703
|
+
pair_after_close_length(VALUE pair)
|
704
|
+
{
|
705
|
+
VALUE around_close = pair_get_around_close_tokens(pair);
|
706
|
+
if (around_close == Qnil) return Qnil;
|
707
|
+
return INT2FIX(pair_get_after_close_len(pair));
|
708
|
+
}
|
709
|
+
|
710
|
+
static VALUE
|
711
|
+
pair_pair_type(VALUE pair)
|
712
|
+
{
|
713
|
+
VALUE def = pair_get_pair_def(pair);
|
714
|
+
return RARRAY_PTR(def)[4];
|
715
|
+
}
|
716
|
+
|
717
|
+
static void
|
718
|
+
check_closed_pairs(pairmatcher_t *pm, VALUE reporter)
|
719
|
+
{
|
720
|
+
int i, j;
|
721
|
+
int after_open_max = pm->after_open_max;
|
722
|
+
int after_close_max = pm->after_close_max;
|
723
|
+
VALUE pair;
|
724
|
+
for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
|
725
|
+
pair = RARRAY_PTR(pm->closed_pairs)[i];
|
726
|
+
if (pair == Qnil)
|
727
|
+
continue;
|
728
|
+
if (pair_get_after_open_len(pair) == after_open_max &&
|
729
|
+
pair_get_after_close_len(pair) == after_close_max) {
|
730
|
+
RARRAY_PTR(pm->closed_pairs)[i] = Qnil;
|
731
|
+
yield_pair(pair);
|
732
|
+
discard_matched_pair(pm, pair, reporter);
|
733
|
+
}
|
734
|
+
}
|
735
|
+
j = 0;
|
736
|
+
for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
|
737
|
+
if (RARRAY_PTR(pm->closed_pairs)[i] != Qnil) {
|
738
|
+
RARRAY_PTR(pm->closed_pairs)[j] = RARRAY_PTR(pm->closed_pairs)[i];
|
739
|
+
j++;
|
740
|
+
}
|
741
|
+
}
|
742
|
+
while (j < RARRAY_LEN(pm->closed_pairs))
|
743
|
+
rb_ary_pop(pm->closed_pairs);
|
744
|
+
}
|
745
|
+
|
746
|
+
static void
|
747
|
+
put_token(pairmatcher_t *pairmatcher, VALUE token, VALUE reporter)
|
748
|
+
{
|
749
|
+
VALUE pair_def, tmp_pair_def;
|
750
|
+
int depth, max_depth, i;
|
751
|
+
add_after_open(pairmatcher, token);
|
752
|
+
add_after_close(pairmatcher, token);
|
753
|
+
check_closed_pairs(pairmatcher, reporter);
|
754
|
+
|
755
|
+
if ((pair_def = open_token_p(pairmatcher, token))) {
|
756
|
+
put_open_token(pairmatcher, token, pair_def);
|
757
|
+
}
|
758
|
+
else {
|
759
|
+
i = 0;
|
760
|
+
max_depth = 0;
|
761
|
+
pair_def = Qfalse;
|
762
|
+
while ((tmp_pair_def = close_token_p(pairmatcher, token, &i)) != Qfalse) {
|
763
|
+
depth = matching_open_depth(pairmatcher, token, tmp_pair_def);
|
764
|
+
if (max_depth <= depth) {
|
765
|
+
pair_def = tmp_pair_def;
|
766
|
+
max_depth = depth;
|
767
|
+
}
|
768
|
+
i++;
|
769
|
+
}
|
770
|
+
if (pair_def != Qfalse) {
|
771
|
+
put_close_token(pairmatcher, token, pair_def, max_depth, reporter);
|
772
|
+
}
|
773
|
+
}
|
774
|
+
add_recent(pairmatcher, reporter, token);
|
775
|
+
}
|
776
|
+
|
777
|
+
static int
|
778
|
+
intertoken_p(pairmatcher_t *pairmatcher, VALUE token_type)
|
779
|
+
{
|
780
|
+
int i;
|
781
|
+
for (i = 0; i < RARRAY_LEN(pairmatcher->intertoken_defs); i++) {
|
782
|
+
VALUE def = RARRAY_PTR(pairmatcher->intertoken_defs)[i];
|
783
|
+
VALUE def_type = RARRAY_PTR(def)[0];
|
784
|
+
//VALUE def_text = RARRAY_PTR(def)[1];
|
785
|
+
if (def_type == token_type) {
|
786
|
+
return 1;
|
787
|
+
}
|
788
|
+
}
|
789
|
+
return 0;
|
790
|
+
}
|
791
|
+
|
792
|
+
static void
|
793
|
+
finish(pairmatcher_t *pm, VALUE reporter)
|
794
|
+
{
|
795
|
+
int i;
|
796
|
+
VALUE pair;
|
797
|
+
for (i = 0; i < RARRAY_LEN(pm->closed_pairs); i++) {
|
798
|
+
pair = RARRAY_PTR(pm->closed_pairs)[i];
|
799
|
+
if (pair == Qnil)
|
800
|
+
continue;
|
801
|
+
RARRAY_PTR(pm->closed_pairs)[i] = Qnil;
|
802
|
+
yield_pair(pair);
|
803
|
+
discard_matched_pair(pm, pair, reporter);
|
804
|
+
}
|
805
|
+
while (RARRAY_LEN(pm->pair_stack)) {
|
806
|
+
discard_unmatched_pair(pm, reporter);
|
807
|
+
}
|
808
|
+
report_token_list_now(pm, reporter, pm->recent_tokens, 0, RARRAY_LEN(pm->recent_tokens));
|
809
|
+
}
|
810
|
+
|
811
|
+
static void
|
812
|
+
parse(pairmatcher_t *pm, VALUE tokenizer, VALUE reporter)
|
813
|
+
{
|
814
|
+
VALUE token_info;
|
815
|
+
while ((token_info = get_token(tokenizer)) != Qnil) {
|
816
|
+
VALUE token_type, token_text, token_lineno, token_byteno;
|
817
|
+
VALUE token;
|
818
|
+
Check_Type(token_info, T_ARRAY);
|
819
|
+
if (RARRAY_LEN(token_info) != 8) {
|
820
|
+
rb_raise(rb_eArgError, "unexpected token");
|
821
|
+
}
|
822
|
+
token_type = RARRAY_PTR(token_info)[0];
|
823
|
+
token_text = RARRAY_PTR(token_info)[1];
|
824
|
+
token_lineno = RARRAY_PTR(token_info)[2];
|
825
|
+
token_byteno = RARRAY_PTR(token_info)[4];
|
826
|
+
token = rb_funcall(Fragment, id_new, 4, token_type, token_text, token_lineno, token_byteno);
|
827
|
+
if (intertoken_p(pm, token_type)) {
|
828
|
+
rb_funcall(reporter, id_call, 1, token);
|
829
|
+
}
|
830
|
+
else {
|
831
|
+
put_token(pm, token, reporter);
|
832
|
+
}
|
833
|
+
}
|
834
|
+
finish(pm, reporter);
|
835
|
+
}
|
836
|
+
|
837
|
+
static VALUE
|
838
|
+
pairmatcher_parse(VALUE self, VALUE tokenizer, VALUE reporter)
|
839
|
+
{
|
840
|
+
pairmatcher_t *pairmatcher;
|
841
|
+
GetPM(self, pairmatcher);
|
842
|
+
parse(pairmatcher, tokenizer, reporter);
|
843
|
+
|
844
|
+
return Qnil;
|
845
|
+
}
|
846
|
+
|
847
|
+
void Init_pairmatcher(void)
|
848
|
+
{
|
849
|
+
VALUE LangScan = rb_const_get(rb_cObject, rb_intern("LangScan"));
|
850
|
+
VALUE PairMatcher = rb_define_class_under(LangScan, "PairMatcher", rb_cData);
|
851
|
+
Fragment = rb_const_get(LangScan, rb_intern("Fragment"));
|
852
|
+
rb_global_variable(&Fragment);
|
853
|
+
|
854
|
+
id_get_token = rb_intern("get_token");
|
855
|
+
id_new = rb_intern("new");
|
856
|
+
id_call = rb_intern("call");
|
857
|
+
|
858
|
+
rb_define_alloc_func(PairMatcher, pairmatcher_s_allocate);
|
859
|
+
rb_define_method(PairMatcher, "initialize", pairmatcher_initialize, 4);
|
860
|
+
//rb_define_method(PairMatcher, "initialize_copy", pairmatcher_initialize_copy, 1);
|
861
|
+
rb_define_method(PairMatcher, "define_intertoken_fragment", pairmatcher_define_intertoken_fragment, 2);
|
862
|
+
rb_define_method(PairMatcher, "define_pair", pairmatcher_define_pair, 5);
|
863
|
+
rb_define_method(PairMatcher, "before_open_max", pairmatcher_get_before_open_max, 0);
|
864
|
+
rb_define_method(PairMatcher, "after_open_max", pairmatcher_get_after_open_max, 0);
|
865
|
+
rb_define_method(PairMatcher, "before_close_max", pairmatcher_get_before_close_max, 0);
|
866
|
+
rb_define_method(PairMatcher, "after_close_max", pairmatcher_get_after_close_max, 0);
|
867
|
+
rb_define_method(PairMatcher, "parse", pairmatcher_parse, 2);
|
868
|
+
|
869
|
+
Pair = rb_struct_define("LangScanPair",
|
870
|
+
"pair_def",
|
871
|
+
"before_open_len",
|
872
|
+
"around_open_tokens",
|
873
|
+
"before_close_len",
|
874
|
+
"around_close_tokens",
|
875
|
+
"outer",
|
876
|
+
NULL);
|
877
|
+
rb_define_method(Pair, "before_open_tokens", pair_before_open_tokens, 0);
|
878
|
+
rb_define_method(Pair, "before_open_length", pair_before_open_length, 0);
|
879
|
+
rb_define_method(Pair, "around_open", pair_around_open, 1);
|
880
|
+
rb_define_method(Pair, "open_token", pair_open_token, 0);
|
881
|
+
rb_define_method(Pair, "after_open_tokens", pair_after_open_tokens, 0);
|
882
|
+
rb_define_method(Pair, "after_open_length", pair_after_open_length, 0);
|
883
|
+
rb_define_method(Pair, "before_close_tokens", pair_before_close_tokens, 0);
|
884
|
+
rb_define_method(Pair, "before_close_length", pair_before_close_length, 0);
|
885
|
+
rb_define_method(Pair, "around_close", pair_around_close, 1);
|
886
|
+
rb_define_method(Pair, "close_token", pair_close_token, 0);
|
887
|
+
rb_define_method(Pair, "after_close_tokens", pair_after_close_tokens, 0);
|
888
|
+
rb_define_method(Pair, "after_close_length", pair_after_close_length, 0);
|
889
|
+
rb_define_method(Pair, "pair_type", pair_pair_type, 0);
|
890
|
+
}
|