breakout_parser 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +16 -0
- data/LICENSE +39 -0
- data/Rakefile +99 -0
- data/VERSION +1 -0
- data/ext/breakout_parser/_make.sh +7 -0
- data/ext/breakout_parser/extconf.rb +16 -0
- data/ext/breakout_parser/lex.yy.c +2449 -0
- data/ext/breakout_parser/lex.yy.o +0 -0
- data/ext/breakout_parser/main.c +32 -0
- data/ext/breakout_parser/main.o +0 -0
- data/ext/breakout_parser/make_win32.bat +15 -0
- data/ext/breakout_parser/parser +0 -0
- data/ext/breakout_parser/parser.l +162 -0
- data/ext/breakout_parser/parser.tab.h +98 -0
- data/ext/breakout_parser/parser.tab.o +0 -0
- data/ext/breakout_parser/parser.y +357 -0
- data/ext/breakout_parser/ruby_ext.c +55 -0
- data/ext/breakout_parser/ruby_ext.o +0 -0
- data/ext/breakout_parser/test.rb +3 -0
- data/ext/breakout_parser/yywrap.c +3 -0
- data/ext/breakout_parser/yywrap.o +0 -0
- data/lib/breakout_parser.rb +6 -0
- data/spec/parser_examples_spec.rb +101 -0
- data/spec/parser_spec.rb +549 -0
- metadata +87 -0
Binary file
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#ifndef RUBY_VERSION
|
2
|
+
|
3
|
+
// this file is compiled in ONLY if RUBY_VERSION is NOT defined (compiling w/o ruby)
|
4
|
+
|
5
|
+
#include <string.h>
|
6
|
+
#include <stdio.h>
|
7
|
+
#include <stdlib.h>
|
8
|
+
|
9
|
+
extern char*buf, *bufptr;
|
10
|
+
extern char*in_buf;
|
11
|
+
extern size_t in_buf_len, bufsize;
|
12
|
+
|
13
|
+
main(){
|
14
|
+
char *p;
|
15
|
+
// *buf = 0;
|
16
|
+
// strcpy(in_buf,"h1. aaa");
|
17
|
+
// in_buf_len = strlen(in_buf);
|
18
|
+
|
19
|
+
bufsize = 1 + in_buf_len + in_buf_len/3; // reserve 30% of in_buf size
|
20
|
+
if(bufsize<0x100) bufsize = 0x100;
|
21
|
+
|
22
|
+
buf = malloc(bufsize);
|
23
|
+
bufptr = buf;
|
24
|
+
|
25
|
+
yyparse();
|
26
|
+
|
27
|
+
p = buf;
|
28
|
+
while(strncmp(p,"<br />",6) == 0) p += 6;
|
29
|
+
printf("%s\n",p);
|
30
|
+
}
|
31
|
+
|
32
|
+
#endif // ifndef RUBY_VERSION
|
Binary file
|
Binary file
|
@@ -0,0 +1,162 @@
|
|
1
|
+
%{
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include "parser.tab.h"
|
4
|
+
|
5
|
+
// HACK for parsing end of bold/italic text at EOF
|
6
|
+
// see http://flex.sourceforge.net/manual/How-can-I-match-text-only-at-the-end-of-a-file_003f.html#How-can-I-match-text-only-at-the-end-of-a-file_003f
|
7
|
+
int was_fake_br = 0;
|
8
|
+
|
9
|
+
extern char* in_buf;
|
10
|
+
extern char* in_pos;
|
11
|
+
extern size_t in_buf_len;
|
12
|
+
|
13
|
+
#undef YY_BUF_SIZE
|
14
|
+
#define YY_BUF_SIZE 1300000
|
15
|
+
#undef YY_READ_BUF_SIZE
|
16
|
+
#define YY_READ_BUF_SIZE 1200000
|
17
|
+
|
18
|
+
#define YY_NO_UNISTD_H
|
19
|
+
|
20
|
+
|
21
|
+
#define MIN(a,b) ((a) < (b)) ? (a) : (b)
|
22
|
+
|
23
|
+
#ifdef RUBY_VERSION
|
24
|
+
|
25
|
+
#define YY_INPUT(buf,result,max_size) \
|
26
|
+
{ \
|
27
|
+
size_t unread_size = in_buf_len - (in_pos - in_buf); \
|
28
|
+
if( unread_size > 0 ){ \
|
29
|
+
size_t blocksize = MIN(max_size, unread_size); \
|
30
|
+
was_fake_br = 0; \
|
31
|
+
memcpy(buf,in_pos,blocksize); \
|
32
|
+
in_pos += blocksize; \
|
33
|
+
result = blocksize; \
|
34
|
+
} else if ( !was_fake_br ){ \
|
35
|
+
memcpy(buf,"\n\xff",2); result = 2; was_fake_br = 1; \
|
36
|
+
} else { \
|
37
|
+
result = 0; \
|
38
|
+
} \
|
39
|
+
}
|
40
|
+
|
41
|
+
#else
|
42
|
+
|
43
|
+
#define YY_INPUT(buf,result,max_size) \
|
44
|
+
if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
|
45
|
+
{ \
|
46
|
+
int c = '*'; \
|
47
|
+
int n; \
|
48
|
+
for ( n = 0; n < max_size && \
|
49
|
+
(c = getc( yyin )) != EOF && c != '\n'; ++n ) \
|
50
|
+
buf[n] = (char) c; \
|
51
|
+
if ( c == '\n' ) \
|
52
|
+
buf[n++] = (char) c; \
|
53
|
+
if ( c == EOF && ferror( yyin ) ) \
|
54
|
+
YY_FATAL_ERROR( "input in flex scanner failed" ); \
|
55
|
+
result = n; \
|
56
|
+
} \
|
57
|
+
else \
|
58
|
+
{ \
|
59
|
+
errno=0; \
|
60
|
+
while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \
|
61
|
+
{ \
|
62
|
+
if( errno != EINTR) \
|
63
|
+
{ \
|
64
|
+
YY_FATAL_ERROR( "input in flex scanner failed" ); \
|
65
|
+
break; \
|
66
|
+
} \
|
67
|
+
errno=0; \
|
68
|
+
clearerr(yyin); \
|
69
|
+
} \
|
70
|
+
}\
|
71
|
+
if( result == 0 && errno == 0 && was_fake_br == 0 ){\
|
72
|
+
strcpy(buf,"\n\xff"); result = 2; was_fake_br = 1; \
|
73
|
+
}
|
74
|
+
|
75
|
+
#endif // ifdef RUBY_VERSION
|
76
|
+
|
77
|
+
%}
|
78
|
+
|
79
|
+
%option stack
|
80
|
+
%x _PRE
|
81
|
+
%s _BOLD _ITALIC
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
%%
|
86
|
+
|
87
|
+
\xff ; /* EOF mark. should not appear in valid UTF8 text */
|
88
|
+
[\r\n\t ]+\xff ; /* EOF + skip tailing whitespace */
|
89
|
+
|
90
|
+
|
91
|
+
([ \t]*\n){2,} { return BRBR; }
|
92
|
+
([ \t]*\r){2,} { return BRBR; }
|
93
|
+
([ \t]*\r\n){2,} { return BRBR; }
|
94
|
+
|
95
|
+
[ \t]*(\r\n|\r|\n) { return BR; }
|
96
|
+
|
97
|
+
\<pre>[ \t]*<code>[\r\n]* { BEGIN _PRE; return PRE_START; }
|
98
|
+
|
99
|
+
^\*/[^ \t\r\n*.] { yylval.ivalue=0; yy_push_state(_BOLD); return BOLD_START; }
|
100
|
+
[ \t]+\*/[^ \t\r\n*.] { yylval.ivalue=1; yy_push_state(_BOLD); return BOLD_START; }
|
101
|
+
|
102
|
+
^_/[^ \t\r\n_] { yylval.ivalue=0; yy_push_state(_ITALIC); return ITALIC_START; }
|
103
|
+
[ \t]+_/[^ \t\r\n_] { yylval.ivalue=1; yy_push_state(_ITALIC); return ITALIC_START; }
|
104
|
+
|
105
|
+
^h[1-5]\.[ \t]+[^ \t\r\n][^\r\n]*/[\r\n\xff] {
|
106
|
+
yylval.svalue = yytext+4;
|
107
|
+
switch(yytext[1]){
|
108
|
+
case '1': return H1;
|
109
|
+
case '2': return H2;
|
110
|
+
case '3': return H3;
|
111
|
+
case '4': return H4;
|
112
|
+
case '5': return H5;
|
113
|
+
}
|
114
|
+
return H1;
|
115
|
+
}
|
116
|
+
|
117
|
+
^[ \t]*\*[ ]+ { return ULI; }
|
118
|
+
^[ \t]*#[ ]+ { return OLI; }
|
119
|
+
|
120
|
+
https?:\/\/[^ \r\n<>"(){}*]+[^ \r\n<>"(){}*,.\[\]] { yylval.svalue = yytext; return URL; }
|
121
|
+
|
122
|
+
#[0-9]+/[ \t\r\n,.;()] { yylval.svalue = yytext; return TICKET_LINK; }
|
123
|
+
\[\[ticket:[0-9]+(\|[^\[\]]+)?\]\] { yylval.svalue = yytext; return TICKET_LINK; }
|
124
|
+
|
125
|
+
\[\[revision:[0-9]+(\|[^\[\]]+)?\]\] { yylval.svalue = yytext+11; return SVN_REVISION_LINK; }
|
126
|
+
\[\[revision:[0-9a-f]+(\|[^\[\]]+)?\]\] { yylval.svalue = yytext+11; return GIT_REVISION_LINK; }
|
127
|
+
\[\[r:[0-9]+(\|[^\[\]]+)?\]\] { yylval.svalue = yytext+4; return SVN_REVISION_LINK; }
|
128
|
+
\[\[r:[0-9a-f]+(\|[^\[\]]+)?\]\] { yylval.svalue = yytext+4; return GIT_REVISION_LINK; }
|
129
|
+
|
130
|
+
\[\[url:[a-z]+:\/\/[^]]+\]\] { yylval.svalue = yytext+6; return URL_WITH_PROTO_LINK; }
|
131
|
+
\[\[url:\/[^]]+\]\] { yylval.svalue = yytext+6; return URL_WITH_PROTO_LINK; }
|
132
|
+
\[\[url:[^]]+\]\] { yylval.svalue = yytext+6; return URL_WITHOUT_PROTO_LINK; }
|
133
|
+
|
134
|
+
\[\[wiki:[^]]+\]\] { yylval.svalue = yytext+7; return WIKI_LINK; }
|
135
|
+
\[\[[a-zA-Z0-9_-][a-zA-Z0-9_#-]+(\|[^\[\]]+)?\]\] { yylval.svalue = yytext+2; return WIKI_LINK; }
|
136
|
+
|
137
|
+
\[\[#[^\r\n\[\]]+\]\] { yylval.svalue = yytext+3; return ANCHOR_LINK; }
|
138
|
+
|
139
|
+
^[ \t]+ ; /* skip spaces at line start */
|
140
|
+
[ \t]+ { yylval.ivalue = ' '; return T_CHAR; }
|
141
|
+
|
142
|
+
[a-zA-Z0-9]+ { yylval.svalue = yytext; return T_WORD; }
|
143
|
+
|
144
|
+
. { yylval.ivalue = yytext[0]; return T_CHAR; }
|
145
|
+
|
146
|
+
|
147
|
+
|
148
|
+
<_PRE>[ \t\r\n]*<\/code>[ \t]*<\/pre> { BEGIN INITIAL; return PRE_END; }
|
149
|
+
<_PRE>[ \t\r\n]*\xff { BEGIN INITIAL; return PRE_END; }
|
150
|
+
<_PRE>(?s:.) { yylval.ivalue = yytext[0]; return T_CHAR; }
|
151
|
+
<_PRE><<EOF>> { BEGIN INITIAL; return PRE_END; }
|
152
|
+
|
153
|
+
<_BOLD>[ \t]+\*/[ \t]+ { yylval.svalue = " *"; return T_WORD; } /* skip lone star */
|
154
|
+
<_BOLD>\*/[ \t\r\n'.,] { yy_pop_state(); return BOLD_END; }
|
155
|
+
<_BOLD><<EOF>> { yy_pop_state(); return BOLD_END; }
|
156
|
+
|
157
|
+
<_ITALIC>[ \t]+_/[ \t]+ { yylval.svalue = " _"; return T_WORD; } /* skip lone underscore */
|
158
|
+
<_ITALIC>_/[ \t\r\n'.,] { yy_pop_state(); return ITALIC_END; }
|
159
|
+
<_ITALIC><<EOF>> { yy_pop_state(); return ITALIC_END; }
|
160
|
+
|
161
|
+
%%
|
162
|
+
|
@@ -0,0 +1,98 @@
|
|
1
|
+
|
2
|
+
/* A Bison parser, made by GNU Bison 2.4.1. */
|
3
|
+
|
4
|
+
/* Skeleton interface for Bison's Yacc-like parsers in C
|
5
|
+
|
6
|
+
Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
|
7
|
+
Free Software Foundation, Inc.
|
8
|
+
|
9
|
+
This program is free software: you can redistribute it and/or modify
|
10
|
+
it under the terms of the GNU General Public License as published by
|
11
|
+
the Free Software Foundation, either version 3 of the License, or
|
12
|
+
(at your option) any later version.
|
13
|
+
|
14
|
+
This program is distributed in the hope that it will be useful,
|
15
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
GNU General Public License for more details.
|
18
|
+
|
19
|
+
You should have received a copy of the GNU General Public License
|
20
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
21
|
+
|
22
|
+
/* As a special exception, you may create a larger work that contains
|
23
|
+
part or all of the Bison parser skeleton and distribute that work
|
24
|
+
under terms of your choice, so long as that work isn't itself a
|
25
|
+
parser generator using the skeleton or a modified version thereof
|
26
|
+
as a parser skeleton. Alternatively, if you modify or redistribute
|
27
|
+
the parser skeleton itself, you may (at your option) remove this
|
28
|
+
special exception, which will cause the skeleton and the resulting
|
29
|
+
Bison output files to be licensed under the GNU General Public
|
30
|
+
License without this special exception.
|
31
|
+
|
32
|
+
This special exception was added by the Free Software Foundation in
|
33
|
+
version 2.2 of Bison. */
|
34
|
+
|
35
|
+
|
36
|
+
/* Tokens. */
|
37
|
+
#ifndef YYTOKENTYPE
|
38
|
+
# define YYTOKENTYPE
|
39
|
+
/* Put the tokens into the symbol table, so that GDB and other debuggers
|
40
|
+
know about them. */
|
41
|
+
enum yytokentype {
|
42
|
+
T_CHAR = 258,
|
43
|
+
BOLD_START = 259,
|
44
|
+
ITALIC_START = 260,
|
45
|
+
T_WORD = 261,
|
46
|
+
TICKET_LINK = 262,
|
47
|
+
LINK = 263,
|
48
|
+
SVN_REVISION_LINK = 264,
|
49
|
+
GIT_REVISION_LINK = 265,
|
50
|
+
WIKI_LINK = 266,
|
51
|
+
ANCHOR_LINK = 267,
|
52
|
+
URL_WITH_PROTO_LINK = 268,
|
53
|
+
URL_WITHOUT_PROTO_LINK = 269,
|
54
|
+
URL = 270,
|
55
|
+
UL = 271,
|
56
|
+
H1 = 272,
|
57
|
+
H2 = 273,
|
58
|
+
H3 = 274,
|
59
|
+
H4 = 275,
|
60
|
+
H5 = 276,
|
61
|
+
SPACE = 277,
|
62
|
+
BR = 278,
|
63
|
+
BRBR = 279,
|
64
|
+
OLI = 280,
|
65
|
+
ULI = 281,
|
66
|
+
PRE_START = 282,
|
67
|
+
PRE_END = 283,
|
68
|
+
BOLD_END = 284,
|
69
|
+
ITALIC_END = 285
|
70
|
+
};
|
71
|
+
#endif
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
|
76
|
+
typedef union YYSTYPE
|
77
|
+
{
|
78
|
+
|
79
|
+
/* Line 1676 of yacc.c */
|
80
|
+
#line 59 "parser.y"
|
81
|
+
|
82
|
+
double dvalue;
|
83
|
+
int ivalue;
|
84
|
+
char const* svalue;
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
/* Line 1676 of yacc.c */
|
89
|
+
#line 90 "parser.tab.h"
|
90
|
+
} YYSTYPE;
|
91
|
+
# define YYSTYPE_IS_TRIVIAL 1
|
92
|
+
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
93
|
+
# define YYSTYPE_IS_DECLARED 1
|
94
|
+
#endif
|
95
|
+
|
96
|
+
extern YYSTYPE yylval;
|
97
|
+
|
98
|
+
|
Binary file
|
@@ -0,0 +1,357 @@
|
|
1
|
+
%{
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <string.h>
|
5
|
+
|
6
|
+
#ifdef RUBY_VERSION
|
7
|
+
#include "ruby.h"
|
8
|
+
#else
|
9
|
+
#define REALLOC_N(ptr,type,n) ptr=realloc(ptr,sizeof(type)*n)
|
10
|
+
#endif
|
11
|
+
|
12
|
+
extern int yylex();
|
13
|
+
|
14
|
+
char *in_buf = NULL, *in_pos = NULL;
|
15
|
+
size_t in_buf_len = 0;
|
16
|
+
|
17
|
+
char *buf = NULL, *bufptr = NULL;
|
18
|
+
size_t bufsize = 0;
|
19
|
+
char *space_name = "test_space";
|
20
|
+
|
21
|
+
#define CHECK_BUF_SIZE(len) \
|
22
|
+
if( (bufptr - buf + len + 1) >= bufsize ){ \
|
23
|
+
/*printf("[.] REALLOC oldsz=%d, newsz=%d\n",bufsize, (bufsize+((len > 0x1000) ? (len+0x1000) : 0x1000)));*/ \
|
24
|
+
char *oldbuf = buf; \
|
25
|
+
bufsize += (len > 0x1000) ? (len+0x1000) : 0x1000; \
|
26
|
+
REALLOC_N(buf, char, bufsize); \
|
27
|
+
bufptr = buf + (bufptr-oldbuf); \
|
28
|
+
}
|
29
|
+
|
30
|
+
concat(const char*what, size_t len){
|
31
|
+
// printf("[.] concat: w=\"%s\", l=%d\n",what,len);
|
32
|
+
CHECK_BUF_SIZE(len);
|
33
|
+
memcpy(bufptr, what, len);
|
34
|
+
bufptr += len;
|
35
|
+
// printf("[.] concat OK\n");
|
36
|
+
}
|
37
|
+
|
38
|
+
// it's better to use concat(), especially if "what"'s length is always predefined
|
39
|
+
concat2(const char*what){
|
40
|
+
size_t len = strlen(what);
|
41
|
+
concat(what,len);
|
42
|
+
}
|
43
|
+
|
44
|
+
// better error reporting
|
45
|
+
#define YYERROR_VERBOSE
|
46
|
+
|
47
|
+
// more stacks
|
48
|
+
#define YYMAXDEPTH 0x180000
|
49
|
+
|
50
|
+
// bison requires that you supply this function
|
51
|
+
void yyerror(const char *msg)
|
52
|
+
{
|
53
|
+
printf("ERROR(PARSER): %s\n", msg);
|
54
|
+
}
|
55
|
+
|
56
|
+
|
57
|
+
%}
|
58
|
+
|
59
|
+
%union {
|
60
|
+
double dvalue;
|
61
|
+
int ivalue;
|
62
|
+
char const* svalue;
|
63
|
+
}
|
64
|
+
|
65
|
+
|
66
|
+
%token <ivalue> T_CHAR BOLD_START ITALIC_START
|
67
|
+
%token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
|
68
|
+
%token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
|
69
|
+
%token <svalue> URL
|
70
|
+
%token <svalue> UL
|
71
|
+
%token <svalue> H1 H2 H3 H4 H5
|
72
|
+
%token SPACE BR BRBR OLI ULI
|
73
|
+
%token PRE_START PRE_END
|
74
|
+
%token BOLD_END ITALIC_END
|
75
|
+
|
76
|
+
//%type <dvalue> expression
|
77
|
+
//%type <dvalue> term
|
78
|
+
//%type <dvalue> varornum
|
79
|
+
%%
|
80
|
+
text :
|
81
|
+
| textitem text
|
82
|
+
|
83
|
+
|
84
|
+
textitem: br
|
85
|
+
| words
|
86
|
+
| h1 {concat("</h1>",5)}
|
87
|
+
| h2 {concat("</h2>",5)}
|
88
|
+
| h3 {concat("</h3>",5)}
|
89
|
+
| h4 {concat("</h4>",5)}
|
90
|
+
| h5 {concat("</h5>",5)}
|
91
|
+
| {concat("<ul>",4)} ulist {concat("</ul>",5)} textitem
|
92
|
+
| {concat("<ol>",4)} olist {concat("</ol>",5)} textitem
|
93
|
+
| pre
|
94
|
+
|
95
|
+
ulist: ulitem {concat("</li>",5)}
|
96
|
+
| ulist ulitem {concat("</li>",5)}
|
97
|
+
|
98
|
+
ulitem: uli words
|
99
|
+
| uli words BR
|
100
|
+
|
101
|
+
olist: olitem {concat("</li>",5)}
|
102
|
+
| olist olitem {concat("</li>",5)}
|
103
|
+
|
104
|
+
olitem: oli words
|
105
|
+
| oli words BR
|
106
|
+
|
107
|
+
words: word
|
108
|
+
| word words
|
109
|
+
|
110
|
+
word : chars
|
111
|
+
| link
|
112
|
+
| T_WORD {concat2($1)} // TODO: somehow pass T_WORD's length here
|
113
|
+
| URL {process_url($1)}
|
114
|
+
| BOLD_START {$1 ? concat(" <strong>",9) : concat("<strong>",8)}
|
115
|
+
| BOLD_END {concat("</strong>",9)}
|
116
|
+
| ITALIC_START {$1 ? concat(" <em>",5) : concat("<em>",4)}
|
117
|
+
| ITALIC_END {concat("</em>",5)}
|
118
|
+
|
119
|
+
link: TICKET_LINK {process_ticket_link($1)}
|
120
|
+
| SVN_REVISION_LINK {process_svn_link($1)}
|
121
|
+
| GIT_REVISION_LINK {process_git_link($1)}
|
122
|
+
| URL_WITH_PROTO_LINK {process_url_link($1,NULL)}
|
123
|
+
| URL_WITHOUT_PROTO_LINK {process_url_link($1,"http://")}
|
124
|
+
| WIKI_LINK {process_wiki_link($1)}
|
125
|
+
| ANCHOR_LINK {process_anchor_link($1)}
|
126
|
+
|
127
|
+
chars:
|
128
|
+
| char chars
|
129
|
+
|
130
|
+
char : T_CHAR {concat_escaped_char($1)}
|
131
|
+
|
132
|
+
//raw_chars:
|
133
|
+
// | raw_char raw_chars
|
134
|
+
|
135
|
+
//raw_char : T_CHAR {concat_raw_char($1)}
|
136
|
+
|
137
|
+
h1 : H1 {concat("<h1 id=\"",8); process_header($1)}
|
138
|
+
h2 : H2 {concat("<h2 id=\"",8); process_header($1)}
|
139
|
+
h3 : H3 {concat("<h3 id=\"",8); process_header($1)}
|
140
|
+
h4 : H4 {concat("<h4 id=\"",8); process_header($1)}
|
141
|
+
h5 : H5 {concat("<h5 id=\"",8); process_header($1)}
|
142
|
+
ul : UL {concat("<ul>",4)}
|
143
|
+
oli : OLI {concat("<li>",4)}
|
144
|
+
uli : ULI {concat("<li>",4)}
|
145
|
+
br : BR {concat("<br />",6)}
|
146
|
+
| BRBR {concat("<br /><br />",12)}
|
147
|
+
pre : PRE_START {concat("<pre><code>",11)} chars PRE_END {concat("</code></pre>",13)}
|
148
|
+
|
149
|
+
//word : T_WORD { process_word($1); }
|
150
|
+
|
151
|
+
%%
|
152
|
+
|
153
|
+
concat_hex_char(char c){
|
154
|
+
unsigned char d;
|
155
|
+
d = ((unsigned char)c)>>4;
|
156
|
+
concat_raw_char(d>9 ? ('a'+d-10) : '0'+d);
|
157
|
+
d = c&0x0f;
|
158
|
+
concat_raw_char(d>9 ? ('a'+d-10) : '0'+d);
|
159
|
+
}
|
160
|
+
|
161
|
+
need_hex_convert(const char*p, const char*pend){
|
162
|
+
// scan for non alphanum chars first
|
163
|
+
for(; *p && p<=pend; p++){
|
164
|
+
if( *p == ' ' ||
|
165
|
+
*p == '_' ||
|
166
|
+
*p == '-' ||
|
167
|
+
*p == '.' ||
|
168
|
+
(*p >= '0' && *p <= '9') ||
|
169
|
+
(*p >= 'a' && *p <= 'z') ||
|
170
|
+
(*p >= 'A' && *p <= 'Z')
|
171
|
+
){
|
172
|
+
// allowed char
|
173
|
+
} else {
|
174
|
+
// forbidden char found
|
175
|
+
return 1;
|
176
|
+
}
|
177
|
+
}
|
178
|
+
return 0;
|
179
|
+
}
|
180
|
+
|
181
|
+
process_header(const char*title){
|
182
|
+
const char*p,*pend;
|
183
|
+
|
184
|
+
// skip heading spaces
|
185
|
+
while(*title == ' ' || *title == '\t') title++;
|
186
|
+
|
187
|
+
// skip tailing spaces
|
188
|
+
p = title;
|
189
|
+
pend = p+strlen(p)-1;
|
190
|
+
while(pend > p && (*pend == ' ' || *pend == '\t')) pend--;
|
191
|
+
|
192
|
+
// concat 'id'
|
193
|
+
if( need_hex_convert(title,pend) ){
|
194
|
+
for(p = title; *p && p<=pend; p++) concat_hex_char( *p );
|
195
|
+
} else {
|
196
|
+
for(p = title; *p && p<=pend; p++) concat_raw_char( *p == ' ' ? '_' : *p);
|
197
|
+
}
|
198
|
+
concat("\">",2);
|
199
|
+
for(p = title; *p && p<=pend; p++) concat_escaped_char( *p );
|
200
|
+
}
|
201
|
+
|
202
|
+
process_link_tail(const char*text,const char*pend,const char*prepend){
|
203
|
+
const char*p;
|
204
|
+
|
205
|
+
concat("\">",2);
|
206
|
+
|
207
|
+
if(!pend){
|
208
|
+
pend = strchr(text,']');
|
209
|
+
pend = pend ? (pend-1) : (text + strlen(text) - 1);
|
210
|
+
}
|
211
|
+
|
212
|
+
// find start of TITLE: [[url:http://www.ru|TITLE]]
|
213
|
+
if(p = strchr(text,'|')){
|
214
|
+
// title found
|
215
|
+
p++;
|
216
|
+
} else {
|
217
|
+
// no title
|
218
|
+
if(prepend) concat2(prepend);
|
219
|
+
p = text;
|
220
|
+
}
|
221
|
+
|
222
|
+
for(; *p && p<=pend; p++) concat_escaped_char( *p );
|
223
|
+
concat("</a>",4);
|
224
|
+
}
|
225
|
+
|
226
|
+
process_anchor_link(const char*target){
|
227
|
+
const char *p,*pend;
|
228
|
+
|
229
|
+
// skip tail
|
230
|
+
p = target;
|
231
|
+
pend = p+strlen(p)-1;
|
232
|
+
while(pend > p && (*pend == ' ' || *pend == '\t' || *pend == ']')) pend--;
|
233
|
+
|
234
|
+
if((p = strchr(target,'|')) && (p<pend) && (p>target)) pend = p-1;
|
235
|
+
|
236
|
+
concat("<a href=\"#",10);
|
237
|
+
if( need_hex_convert(target,pend) ){
|
238
|
+
for(p = target; *p && p<=pend; p++) concat_hex_char( *p );
|
239
|
+
} else {
|
240
|
+
for(p = target; *p && p<=pend; p++) concat_raw_char( *p == ' ' ? '_' : *p);
|
241
|
+
}
|
242
|
+
concat("\" title=\"#",10);
|
243
|
+
for(p = target; *p && p<=pend; p++) concat_raw_char( *p );
|
244
|
+
concat("\" class=\"wiki_link",18);
|
245
|
+
process_link_tail(target,NULL,"#");
|
246
|
+
}
|
247
|
+
|
248
|
+
process_url_link(const char*target,const char* proto){
|
249
|
+
const char *c;
|
250
|
+
concat("<a href=\"",9);
|
251
|
+
if(proto) concat2(proto);
|
252
|
+
for(c=target; *c && *c != ']' && *c != '|'; c++) concat_raw_char(*c);
|
253
|
+
process_link_tail(target,NULL,proto);
|
254
|
+
}
|
255
|
+
|
256
|
+
process_svn_link(const char*target){
|
257
|
+
const char *c;
|
258
|
+
concat("<a href=\"http://code.assembla.com/",34);
|
259
|
+
concat2(space_name);
|
260
|
+
concat("/svn/changesets/",16);
|
261
|
+
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
262
|
+
process_link_tail(target,NULL,"revision:");
|
263
|
+
}
|
264
|
+
|
265
|
+
process_git_link(const char*target){
|
266
|
+
const char *c;
|
267
|
+
concat("<a href=\"http://code.assembla.com/",34);
|
268
|
+
concat2(space_name);
|
269
|
+
concat("/git/changesets/",16);
|
270
|
+
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
271
|
+
process_link_tail(target,NULL,"revision:");
|
272
|
+
}
|
273
|
+
|
274
|
+
process_wiki_link(const char*target){
|
275
|
+
const char *c;
|
276
|
+
concat("<a class=\"wiki_link\" title=\"",28);
|
277
|
+
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
278
|
+
concat("\" href=\"/wiki/show/",19);
|
279
|
+
concat2(space_name);
|
280
|
+
concat_raw_char('/');
|
281
|
+
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
282
|
+
process_link_tail(target,NULL,NULL);
|
283
|
+
}
|
284
|
+
|
285
|
+
process_ticket_link(const char*ticket_id){
|
286
|
+
const char *c;
|
287
|
+
while(*ticket_id && (*ticket_id < '0' || *ticket_id > '9') ) ticket_id++;
|
288
|
+
concat("<a href=\"/spaces/",17);
|
289
|
+
concat2(space_name);
|
290
|
+
concat("/tickets/",9);
|
291
|
+
for(c=ticket_id; *c && *c>='0' && *c<='9'; c++) concat_raw_char(*c);
|
292
|
+
process_link_tail(ticket_id,NULL,"#");
|
293
|
+
}
|
294
|
+
|
295
|
+
concat_escaped_char(int c){
|
296
|
+
switch(c){
|
297
|
+
case '<':
|
298
|
+
concat("<",4);
|
299
|
+
break;
|
300
|
+
case '>':
|
301
|
+
concat(">",4);
|
302
|
+
break;
|
303
|
+
case '"':
|
304
|
+
concat(""",6);
|
305
|
+
break;
|
306
|
+
case '&':
|
307
|
+
concat("&",5);
|
308
|
+
break;
|
309
|
+
default:
|
310
|
+
concat_raw_char(c);
|
311
|
+
break;
|
312
|
+
}
|
313
|
+
}
|
314
|
+
|
315
|
+
|
316
|
+
concat_raw_char(int c){
|
317
|
+
CHECK_BUF_SIZE(1);
|
318
|
+
*bufptr++ = c;
|
319
|
+
}
|
320
|
+
|
321
|
+
/*
|
322
|
+
concat_escaping_html(const char*what){
|
323
|
+
char *po = buf + strlen(buf);
|
324
|
+
const char *pi = what;
|
325
|
+
|
326
|
+
for(;*pi;pi++){
|
327
|
+
switch(*pi){
|
328
|
+
case '<':
|
329
|
+
strcpy(po,"<");
|
330
|
+
po += 4;
|
331
|
+
break;
|
332
|
+
case '>':
|
333
|
+
strcpy(po,">");
|
334
|
+
po += 4;
|
335
|
+
break;
|
336
|
+
case '"':
|
337
|
+
strcpy(po,""");
|
338
|
+
po += 6;
|
339
|
+
break;
|
340
|
+
case '&':
|
341
|
+
strcpy(po,"&");
|
342
|
+
po += 5;
|
343
|
+
break;
|
344
|
+
default:
|
345
|
+
*po++ = *pi;
|
346
|
+
}
|
347
|
+
}
|
348
|
+
*po = 0;
|
349
|
+
}*/
|
350
|
+
|
351
|
+
process_url(const char*url){
|
352
|
+
const char *p;
|
353
|
+
|
354
|
+
concat("<a href=\"",9);
|
355
|
+
for(p=url; *p; p++) concat_raw_char(*p);
|
356
|
+
process_link_tail(url,NULL,NULL);
|
357
|
+
}
|