bbcodelib 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/Manifest.txt +21 -0
- data/README.rdoc +39 -0
- data/Rakefile +34 -0
- data/bbcodelib.gemspec +34 -0
- data/ext/Makefile +150 -0
- data/ext/bbcode.cpp +37 -0
- data/ext/bbcode_config.h +179 -0
- data/ext/bbcode_lexer.cpp +297 -0
- data/ext/bbcode_lexer.h +99 -0
- data/ext/bbcode_parser.cpp +851 -0
- data/ext/bbcode_parser.h +159 -0
- data/ext/bbcode_utils.cpp +143 -0
- data/ext/bbcode_utils.h +144 -0
- data/ext/extconf.rb +13 -0
- data/lib/bbcodelib.rb +6 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_bbcodelib.rb +11 -0
- data/test/test_helper.rb +3 -0
- metadata +89 -0
@@ -0,0 +1,297 @@
|
|
1
|
+
#include "bbcode_lexer.h"
|
2
|
+
|
3
|
+
namespace bbcode{
|
4
|
+
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
|
5
|
+
// CHAR ENTER CR_RET COL SEMICOL QUOTE SP EQAL PUBACK EQUALITY [ ] / *
|
6
|
+
enum conds Lexer[][17]={/*C1*/ {C2, C4, C1, C6, C7, C8, C10, C1, NT, NO, C12, C13, C14, C15, C16 }, //-
|
7
|
+
/*C2*/ {C2, C3, C3, C3, C3, C3, C3, C1, NT, NO, C3, C3, C3, C3, C3 }, //---263422640625425\r
|
8
|
+
/*C3*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, YES, C1, C1, C1, C1, C1 }, //we found WORD, pushback
|
9
|
+
/*C4*/ {C2, C1, C1, C6, C7, C8, C1, C1, NT, NO, C12, C1, C1, C1, C1 }, //\r\n
|
10
|
+
/*C5*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //EMPTY
|
11
|
+
/*C6*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found COLON
|
12
|
+
/*C7*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SEMICOLON
|
13
|
+
/*C8*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found QUOTE
|
14
|
+
/*C9*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //NOT USED
|
15
|
+
/*C10*/ {C11, C11, C11, C11, C11, C11, C10, C11, NT, NO, C11, C11, C11, C11, C1 }, //we're missing spaces
|
16
|
+
/*C11*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, YES, C1, C1, C1, C1, C11 }, //we found SPACE
|
17
|
+
/*C12*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SPACE
|
18
|
+
/*C13*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found LEFT_SQUARE_BRACKET
|
19
|
+
/*C14*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found RIGHT_SQUARE_BRACKET
|
20
|
+
/*C15*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SLASH
|
21
|
+
/*C16*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 } //we found MULTIPLIER
|
22
|
+
};
|
23
|
+
//--------------------------------
|
24
|
+
// lexer::get_lexeme_str, return current lexeme
|
25
|
+
string lexer::get_lexeme_str(void){
|
26
|
+
return _strbuf_copy;
|
27
|
+
}
|
28
|
+
//--------------------------------
|
29
|
+
// lexer::init, perform initialization
|
30
|
+
void lexer::init(const parser_config& pconf){
|
31
|
+
_pconf=pconf;
|
32
|
+
//mak� sure the buffer is empty
|
33
|
+
if(_buffer != NULL){
|
34
|
+
delete [] _buffer;
|
35
|
+
_buffer = NULL;
|
36
|
+
}
|
37
|
+
_buffer = new char[_pconf._read_buffer_size]; /* allocating memory for buffer */
|
38
|
+
_tot_characters_read = 0;
|
39
|
+
_read_chars_num = 0;
|
40
|
+
_current_state = C1; /* setting machine to initial state C1 */
|
41
|
+
_strbuf.clear();
|
42
|
+
_strbuf_copy.clear();
|
43
|
+
_upper_str.clear();
|
44
|
+
/* we need this to make sure the trigger won't work in case _DEFAULT_MAX_MULTIPART_FORM_INTERNAL_PARAM_LEN < _pconf._read_buffer_size*/
|
45
|
+
_i = 0;
|
46
|
+
}
|
47
|
+
|
48
|
+
//--------------------------------
|
49
|
+
// lexer::get_lexeme
|
50
|
+
|
51
|
+
lexeme lexer::get_lexeme(void){
|
52
|
+
|
53
|
+
do{ /* we will roatate here till CIN isn't over */
|
54
|
+
|
55
|
+
if(_i == _read_chars_num){
|
56
|
+
_pconf._source_stream->read(_buffer, static_cast<streamsize>(_pconf._read_buffer_size));
|
57
|
+
_read_chars_num = static_cast<size_t>(_pconf._source_stream->gcount());
|
58
|
+
if(!_read_chars_num){
|
59
|
+
|
60
|
+
if(_current_state == C2){
|
61
|
+
_strbuf_copy = _strbuf;
|
62
|
+
_strbuf.clear();
|
63
|
+
_current_state = C1;
|
64
|
+
_i = 0; //in case we have just one word, and there is nothing more, we return word and preparing for the next TERM
|
65
|
+
return WORD;
|
66
|
+
}
|
67
|
+
|
68
|
+
return TERM; /* end of stream reached*/
|
69
|
+
}
|
70
|
+
//if(_tot_characters_read + _read_chars_num > _pconf._max_content_length) /* we have reached max_content_length limitation */
|
71
|
+
// throw out_of_range(get_module_msg("max content length reached (") + stream_cast<string>(_pconf._max_content_length) + ")");
|
72
|
+
_tot_characters_read += _read_chars_num; /* increment read character counter */
|
73
|
+
_i = 0;
|
74
|
+
}
|
75
|
+
|
76
|
+
for(; _i < _read_chars_num; _i++){
|
77
|
+
|
78
|
+
//_strbuf.push_back(_buffer[_i]);
|
79
|
+
_strbuf.append(1, _buffer[_i]);
|
80
|
+
|
81
|
+
switch(_buffer[_i]){
|
82
|
+
|
83
|
+
case COLON :
|
84
|
+
_current_state = Lexer[_current_state][3];
|
85
|
+
break;
|
86
|
+
//case SEMICOLON :
|
87
|
+
// _current_state = Lexer[_current_state][4];
|
88
|
+
// break;
|
89
|
+
case QUOTE :
|
90
|
+
_current_state = Lexer[_current_state][5];
|
91
|
+
break;
|
92
|
+
case ENTER :
|
93
|
+
_current_state = Lexer[_current_state][1];
|
94
|
+
break;
|
95
|
+
case CR_RETURN :
|
96
|
+
_current_state = Lexer[_current_state][2];
|
97
|
+
break;
|
98
|
+
case SPACEBAR :
|
99
|
+
_current_state = Lexer[_current_state][6];
|
100
|
+
break;
|
101
|
+
case EQUALITY :
|
102
|
+
_current_state = Lexer[_current_state][10];
|
103
|
+
break;
|
104
|
+
case LEFT_SQUARE_BRACKET:
|
105
|
+
_current_state = Lexer[_current_state][11];
|
106
|
+
break;
|
107
|
+
case RIGHT_SQUARE_BRACKET:
|
108
|
+
_current_state = Lexer[_current_state][12];
|
109
|
+
break;
|
110
|
+
case SLASH:
|
111
|
+
_current_state = Lexer[_current_state][13];
|
112
|
+
break;
|
113
|
+
case MULT:
|
114
|
+
_current_state = Lexer[_current_state][14];
|
115
|
+
break;
|
116
|
+
|
117
|
+
default:
|
118
|
+
_current_state = Lexer[_current_state][0];
|
119
|
+
break;
|
120
|
+
}
|
121
|
+
bool _was_erased = false;
|
122
|
+
/* now, check out if we're in Terminate state */
|
123
|
+
if(Lexer[_current_state][8] == T && Lexer[_current_state][9] == YES){
|
124
|
+
//pushing back one symbol
|
125
|
+
//take you attention that we just not have to decrese _i, (due to implementation)
|
126
|
+
if(int(_strbuf.length()-1) >= 0){
|
127
|
+
_strbuf.erase(_strbuf.length()-1);
|
128
|
+
_was_erased = true;
|
129
|
+
}
|
130
|
+
}
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
if(Lexer[_current_state][8] == T && Lexer[_current_state][9] == NO && _i + 1 <= _read_chars_num){
|
135
|
+
_i++;
|
136
|
+
}
|
137
|
+
|
138
|
+
switch(_current_state){
|
139
|
+
case C3: /* we found WORD */
|
140
|
+
// That it is not the proper place for this hack
|
141
|
+
// but it make parser simplier
|
142
|
+
_strbuf_copy = _strbuf;
|
143
|
+
_strbuf.clear();
|
144
|
+
_current_state = C1;
|
145
|
+
switch(_strbuf_copy.length()){
|
146
|
+
case 1:
|
147
|
+
_upper_str = upper_case(_strbuf_copy);
|
148
|
+
if(_upper_str == _BOLD)
|
149
|
+
return BOLD;
|
150
|
+
if(_upper_str == _ITALIC)
|
151
|
+
return ITALIC;
|
152
|
+
if(_upper_str == _UNDERLINE)
|
153
|
+
return UNDERLINE;
|
154
|
+
if(_upper_str == _HEADER)
|
155
|
+
return HEADER;
|
156
|
+
if(_upper_str == _SMALL)
|
157
|
+
return SMALL;
|
158
|
+
if(_upper_str == _OFFTOPIC)
|
159
|
+
return OFFTOPIC;
|
160
|
+
if(_upper_str == _Q)
|
161
|
+
return QQUOTE;
|
162
|
+
if(_upper_str == _MULT)
|
163
|
+
return MULT;
|
164
|
+
break;
|
165
|
+
case 2:
|
166
|
+
_upper_str = upper_case(_strbuf_copy);
|
167
|
+
if(_upper_str == _HR)
|
168
|
+
return HR;
|
169
|
+
break;
|
170
|
+
|
171
|
+
case 3:
|
172
|
+
_upper_str = upper_case(_strbuf_copy);
|
173
|
+
if(_upper_str == _SUP)
|
174
|
+
return SUP;
|
175
|
+
if(_upper_str == _SUB)
|
176
|
+
return SUB;
|
177
|
+
if(_upper_str == _RED)
|
178
|
+
return RED;
|
179
|
+
if(_upper_str == _URL)
|
180
|
+
return URL;
|
181
|
+
if(_upper_str == _IMG)
|
182
|
+
return IMG;
|
183
|
+
if(_upper_str == _PRE)
|
184
|
+
return PRE;
|
185
|
+
if(_upper_str == _RED)
|
186
|
+
return RED;
|
187
|
+
break;
|
188
|
+
case 4:
|
189
|
+
_upper_str = upper_case(_strbuf_copy);
|
190
|
+
if(_upper_str == _SIZE)
|
191
|
+
return SIZE;
|
192
|
+
if(_upper_str == _FACE)
|
193
|
+
return FACE;
|
194
|
+
if(_upper_str == _LIST)
|
195
|
+
return LIST;
|
196
|
+
if(_upper_str == _CODE)
|
197
|
+
return CODE;
|
198
|
+
if(_upper_str == _QUOTE)
|
199
|
+
return QQUOTE;
|
200
|
+
if(_upper_str == _BLUE)
|
201
|
+
return BLUE;
|
202
|
+
break;
|
203
|
+
case 5:
|
204
|
+
_upper_str = upper_case(_strbuf_copy);
|
205
|
+
if(_upper_str == _EMAIL)
|
206
|
+
return EMAIL;
|
207
|
+
if(_upper_str == _QUOTE)
|
208
|
+
return QQUOTE;
|
209
|
+
if(_upper_str == _COLOR)
|
210
|
+
return COLOR;
|
211
|
+
if(_upper_str == _GREEN)
|
212
|
+
return GREEN;
|
213
|
+
if(_upper_str == _WHITE)
|
214
|
+
return WHITE;
|
215
|
+
if(_upper_str == _BLACK)
|
216
|
+
return BLACK;
|
217
|
+
if(_upper_str == _OLIST)
|
218
|
+
return OLIST;
|
219
|
+
case 6:
|
220
|
+
_upper_str = upper_case(_strbuf_copy);
|
221
|
+
if(_upper_str == _ORANGE)
|
222
|
+
return ORANGE;
|
223
|
+
if(_upper_str == _PURPLE)
|
224
|
+
return PURPLE;
|
225
|
+
if(_upper_str == _YELLOW)
|
226
|
+
return YELLOW;
|
227
|
+
|
228
|
+
};
|
229
|
+
return WORD;
|
230
|
+
|
231
|
+
|
232
|
+
case C6: /* we found COLON */
|
233
|
+
_strbuf_copy = _strbuf;
|
234
|
+
_strbuf.clear();
|
235
|
+
_current_state = C1;
|
236
|
+
return COLON;
|
237
|
+
|
238
|
+
case C7: /* we found SEMICOLON */
|
239
|
+
_strbuf_copy = _strbuf;
|
240
|
+
_strbuf.clear();
|
241
|
+
_current_state = C1;
|
242
|
+
return SEMICOLON;
|
243
|
+
|
244
|
+
case C8: /* we found QUOTE */
|
245
|
+
_strbuf_copy = _strbuf;
|
246
|
+
_strbuf.clear();
|
247
|
+
_current_state = C1;
|
248
|
+
return QUOTE;
|
249
|
+
|
250
|
+
case C11: /* we found SPACE */
|
251
|
+
_strbuf_copy = _strbuf;
|
252
|
+
_strbuf.clear();
|
253
|
+
_current_state = C1;
|
254
|
+
return SPACE;
|
255
|
+
|
256
|
+
case C12: /* we found EQUALITY */
|
257
|
+
_strbuf_copy = _strbuf;
|
258
|
+
_strbuf.clear();
|
259
|
+
_current_state = C1;
|
260
|
+
return EQUALITY;
|
261
|
+
|
262
|
+
case C13: /* we found LEFT_SQUARE_BRACKET */
|
263
|
+
_strbuf_copy = _strbuf;
|
264
|
+
_strbuf.clear();
|
265
|
+
_current_state = C1;
|
266
|
+
return LEFT_SQUARE_BRACKET;
|
267
|
+
|
268
|
+
case C14: /* we found RIGHT_SQUARE_BRACKET */
|
269
|
+
_strbuf_copy = _strbuf;
|
270
|
+
_strbuf.clear();
|
271
|
+
_current_state = C1;
|
272
|
+
return RIGHT_SQUARE_BRACKET;
|
273
|
+
|
274
|
+
case C15: /* we found SLASH */
|
275
|
+
_strbuf_copy = _strbuf;
|
276
|
+
_strbuf.clear();
|
277
|
+
_current_state = C1;
|
278
|
+
return SLASH;
|
279
|
+
|
280
|
+
case C16: /* we found MULT */
|
281
|
+
_strbuf_copy = _strbuf;
|
282
|
+
_strbuf.clear();
|
283
|
+
_current_state = C1;
|
284
|
+
return MULT;
|
285
|
+
|
286
|
+
default :
|
287
|
+
break;
|
288
|
+
}
|
289
|
+
|
290
|
+
}
|
291
|
+
|
292
|
+
}while(_read_chars_num <= _pconf._read_buffer_size);
|
293
|
+
|
294
|
+
return TERM;
|
295
|
+
}
|
296
|
+
|
297
|
+
}
|
data/ext/bbcode_lexer.h
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
/*!
|
2
|
+
Abstract:
|
3
|
+
this file is a part of bbcode library
|
4
|
+
|
5
|
+
Author:
|
6
|
+
Igor Franchuk (sprog@online.ru)
|
7
|
+
|
8
|
+
Last Update:
|
9
|
+
$Id: bbcode_lexer.h,v 1.1 2007/12/19 19:13:30 lanthruster Exp $
|
10
|
+
Version: 0.01
|
11
|
+
*/
|
12
|
+
|
13
|
+
#ifndef BBCODE_LEXER_H
|
14
|
+
#define BBCODE_LEXER_H
|
15
|
+
#pragma once
|
16
|
+
|
17
|
+
#include <string>
|
18
|
+
#include <iostream>
|
19
|
+
#include <istream>
|
20
|
+
#include <stdexcept>
|
21
|
+
|
22
|
+
#include "bbcode_config.h"
|
23
|
+
#include "bbcode_utils.h"
|
24
|
+
|
25
|
+
#pragma warning(disable: 4290) //VS doesn't support exception specifications
|
26
|
+
|
27
|
+
namespace{
|
28
|
+
|
29
|
+
using std::string;
|
30
|
+
using std::istream;
|
31
|
+
using std::cin;
|
32
|
+
using std::out_of_range;
|
33
|
+
using std::length_error;
|
34
|
+
using std::streamsize;
|
35
|
+
|
36
|
+
}
|
37
|
+
|
38
|
+
namespace bbcode{
|
39
|
+
|
40
|
+
//symbols
|
41
|
+
// T is TERMINATOR, NT = NOT TERMINATOR
|
42
|
+
// CHAR = anyting but (COLON, SEMICOLON, QUOTE, ENTER, CR_RETURN, SPACE, EQUALITY)
|
43
|
+
// enum symbols{CHAR, COLON = ':', SEMICOLON = ';', QUOTE = '"', ENTER = '\r', SPACEBAR = ' ', CR_RETURN = '\n'};
|
44
|
+
|
45
|
+
//EM states
|
46
|
+
enum conds{C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11, C12, C13, C14, C15, C16, C17, C18, C19,
|
47
|
+
C20, C21, C22, C23, C24, C25, C26, C27, C28, C29, C30,
|
48
|
+
C31, C32, C33, C34, C35, C36, C37, C38, C39, C40, C41,
|
49
|
+
C42, C43, C44, C45, C46, C47, C48, C49, C50, C51, C52,
|
50
|
+
C53, C54, C55, C56, C57, C58, C59, C60, C61, C62, C63,
|
51
|
+
C64, C65, C66, C67, C68, C69, C70, C71, C72, C73, C74,
|
52
|
+
C75, C76, C77, C78, C79, C80, C81, C82, C83, C84, C85,
|
53
|
+
C86, C87, C88, C89, C90, C91, C92, C93, C94, C95, C96,
|
54
|
+
C97, C98, C99,C100,C101,C102,C103,C104,C105,C106,C107,
|
55
|
+
C108,C109,C110,C111,C112,C113,C114,C115,C116,C117,C118,
|
56
|
+
C119,C120,C121,C122,C123,C124,C125,C126,C127,C129,C130,
|
57
|
+
END, T=1, NT=0, PB=2, YES=1, NO=0};
|
58
|
+
|
59
|
+
|
60
|
+
class lexer{
|
61
|
+
public:
|
62
|
+
enum flag{ON=1, OFF=0};
|
63
|
+
private:
|
64
|
+
parser_config _pconf;
|
65
|
+
char* _buffer;
|
66
|
+
size_t _tot_characters_read;
|
67
|
+
conds _current_state;
|
68
|
+
string _strbuf;
|
69
|
+
string _strbuf_copy;
|
70
|
+
|
71
|
+
string _upper_str;
|
72
|
+
|
73
|
+
size_t _i;
|
74
|
+
size_t _read_chars_num;
|
75
|
+
|
76
|
+
public:
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
/* class initializer */
|
81
|
+
void init(const parser_config& pconf);
|
82
|
+
|
83
|
+
lexeme get_lexeme(void);
|
84
|
+
string get_lexeme_str(void);
|
85
|
+
|
86
|
+
/* constructor section */
|
87
|
+
lexer() :
|
88
|
+
_tot_characters_read(0),
|
89
|
+
_buffer(NULL)
|
90
|
+
{}
|
91
|
+
|
92
|
+
~lexer() { if(_buffer != NULL){ delete[] _buffer; _buffer = NULL; } }
|
93
|
+
|
94
|
+
};
|
95
|
+
|
96
|
+
|
97
|
+
}
|
98
|
+
|
99
|
+
#endif
|