bbcodelib 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/Manifest.txt +21 -0
- data/README.rdoc +39 -0
- data/Rakefile +34 -0
- data/bbcodelib.gemspec +34 -0
- data/ext/Makefile +150 -0
- data/ext/bbcode.cpp +37 -0
- data/ext/bbcode_config.h +179 -0
- data/ext/bbcode_lexer.cpp +297 -0
- data/ext/bbcode_lexer.h +99 -0
- data/ext/bbcode_parser.cpp +851 -0
- data/ext/bbcode_parser.h +159 -0
- data/ext/bbcode_utils.cpp +143 -0
- data/ext/bbcode_utils.h +144 -0
- data/ext/extconf.rb +13 -0
- data/lib/bbcodelib.rb +6 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_bbcodelib.rb +11 -0
- data/test/test_helper.rb +3 -0
- metadata +89 -0
@@ -0,0 +1,297 @@
|
|
1
|
+
#include "bbcode_lexer.h"
|
2
|
+
|
3
|
+
namespace bbcode{
|
4
|
+
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
|
5
|
+
// CHAR ENTER CR_RET COL SEMICOL QUOTE SP EQAL PUBACK EQUALITY [ ] / *
|
6
|
+
enum conds Lexer[][17]={/*C1*/ {C2, C4, C1, C6, C7, C8, C10, C1, NT, NO, C12, C13, C14, C15, C16 }, //-
|
7
|
+
/*C2*/ {C2, C3, C3, C3, C3, C3, C3, C1, NT, NO, C3, C3, C3, C3, C3 }, //---263422640625425\r
|
8
|
+
/*C3*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, YES, C1, C1, C1, C1, C1 }, //we found WORD, pushback
|
9
|
+
/*C4*/ {C2, C1, C1, C6, C7, C8, C1, C1, NT, NO, C12, C1, C1, C1, C1 }, //\r\n
|
10
|
+
/*C5*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //EMPTY
|
11
|
+
/*C6*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found COLON
|
12
|
+
/*C7*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SEMICOLON
|
13
|
+
/*C8*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found QUOTE
|
14
|
+
/*C9*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //NOT USED
|
15
|
+
/*C10*/ {C11, C11, C11, C11, C11, C11, C10, C11, NT, NO, C11, C11, C11, C11, C1 }, //we're missing spaces
|
16
|
+
/*C11*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, YES, C1, C1, C1, C1, C11 }, //we found SPACE
|
17
|
+
/*C12*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SPACE
|
18
|
+
/*C13*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found LEFT_SQUARE_BRACKET
|
19
|
+
/*C14*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found RIGHT_SQUARE_BRACKET
|
20
|
+
/*C15*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SLASH
|
21
|
+
/*C16*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 } //we found MULTIPLIER
|
22
|
+
};
|
23
|
+
//--------------------------------
|
24
|
+
// lexer::get_lexeme_str, return current lexeme
|
25
|
+
string lexer::get_lexeme_str(void){
|
26
|
+
return _strbuf_copy;
|
27
|
+
}
|
28
|
+
//--------------------------------
|
29
|
+
// lexer::init, perform initialization
|
30
|
+
void lexer::init(const parser_config& pconf){
|
31
|
+
_pconf=pconf;
|
32
|
+
//mak� sure the buffer is empty
|
33
|
+
if(_buffer != NULL){
|
34
|
+
delete [] _buffer;
|
35
|
+
_buffer = NULL;
|
36
|
+
}
|
37
|
+
_buffer = new char[_pconf._read_buffer_size]; /* allocating memory for buffer */
|
38
|
+
_tot_characters_read = 0;
|
39
|
+
_read_chars_num = 0;
|
40
|
+
_current_state = C1; /* setting machine to initial state C1 */
|
41
|
+
_strbuf.clear();
|
42
|
+
_strbuf_copy.clear();
|
43
|
+
_upper_str.clear();
|
44
|
+
/* we need this to make sure the trigger won't work in case _DEFAULT_MAX_MULTIPART_FORM_INTERNAL_PARAM_LEN < _pconf._read_buffer_size*/
|
45
|
+
_i = 0;
|
46
|
+
}
|
47
|
+
|
48
|
+
//--------------------------------
|
49
|
+
// lexer::get_lexeme
|
50
|
+
|
51
|
+
lexeme lexer::get_lexeme(void){
|
52
|
+
|
53
|
+
do{ /* we will roatate here till CIN isn't over */
|
54
|
+
|
55
|
+
if(_i == _read_chars_num){
|
56
|
+
_pconf._source_stream->read(_buffer, static_cast<streamsize>(_pconf._read_buffer_size));
|
57
|
+
_read_chars_num = static_cast<size_t>(_pconf._source_stream->gcount());
|
58
|
+
if(!_read_chars_num){
|
59
|
+
|
60
|
+
if(_current_state == C2){
|
61
|
+
_strbuf_copy = _strbuf;
|
62
|
+
_strbuf.clear();
|
63
|
+
_current_state = C1;
|
64
|
+
_i = 0; //in case we have just one word, and there is nothing more, we return word and preparing for the next TERM
|
65
|
+
return WORD;
|
66
|
+
}
|
67
|
+
|
68
|
+
return TERM; /* end of stream reached*/
|
69
|
+
}
|
70
|
+
//if(_tot_characters_read + _read_chars_num > _pconf._max_content_length) /* we have reached max_content_length limitation */
|
71
|
+
// throw out_of_range(get_module_msg("max content length reached (") + stream_cast<string>(_pconf._max_content_length) + ")");
|
72
|
+
_tot_characters_read += _read_chars_num; /* increment read character counter */
|
73
|
+
_i = 0;
|
74
|
+
}
|
75
|
+
|
76
|
+
for(; _i < _read_chars_num; _i++){
|
77
|
+
|
78
|
+
//_strbuf.push_back(_buffer[_i]);
|
79
|
+
_strbuf.append(1, _buffer[_i]);
|
80
|
+
|
81
|
+
switch(_buffer[_i]){
|
82
|
+
|
83
|
+
case COLON :
|
84
|
+
_current_state = Lexer[_current_state][3];
|
85
|
+
break;
|
86
|
+
//case SEMICOLON :
|
87
|
+
// _current_state = Lexer[_current_state][4];
|
88
|
+
// break;
|
89
|
+
case QUOTE :
|
90
|
+
_current_state = Lexer[_current_state][5];
|
91
|
+
break;
|
92
|
+
case ENTER :
|
93
|
+
_current_state = Lexer[_current_state][1];
|
94
|
+
break;
|
95
|
+
case CR_RETURN :
|
96
|
+
_current_state = Lexer[_current_state][2];
|
97
|
+
break;
|
98
|
+
case SPACEBAR :
|
99
|
+
_current_state = Lexer[_current_state][6];
|
100
|
+
break;
|
101
|
+
case EQUALITY :
|
102
|
+
_current_state = Lexer[_current_state][10];
|
103
|
+
break;
|
104
|
+
case LEFT_SQUARE_BRACKET:
|
105
|
+
_current_state = Lexer[_current_state][11];
|
106
|
+
break;
|
107
|
+
case RIGHT_SQUARE_BRACKET:
|
108
|
+
_current_state = Lexer[_current_state][12];
|
109
|
+
break;
|
110
|
+
case SLASH:
|
111
|
+
_current_state = Lexer[_current_state][13];
|
112
|
+
break;
|
113
|
+
case MULT:
|
114
|
+
_current_state = Lexer[_current_state][14];
|
115
|
+
break;
|
116
|
+
|
117
|
+
default:
|
118
|
+
_current_state = Lexer[_current_state][0];
|
119
|
+
break;
|
120
|
+
}
|
121
|
+
bool _was_erased = false;
|
122
|
+
/* now, check out if we're in Terminate state */
|
123
|
+
if(Lexer[_current_state][8] == T && Lexer[_current_state][9] == YES){
|
124
|
+
//pushing back one symbol
|
125
|
+
//take you attention that we just not have to decrese _i, (due to implementation)
|
126
|
+
if(int(_strbuf.length()-1) >= 0){
|
127
|
+
_strbuf.erase(_strbuf.length()-1);
|
128
|
+
_was_erased = true;
|
129
|
+
}
|
130
|
+
}
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
if(Lexer[_current_state][8] == T && Lexer[_current_state][9] == NO && _i + 1 <= _read_chars_num){
|
135
|
+
_i++;
|
136
|
+
}
|
137
|
+
|
138
|
+
switch(_current_state){
|
139
|
+
case C3: /* we found WORD */
|
140
|
+
// That it is not the proper place for this hack
|
141
|
+
// but it make parser simplier
|
142
|
+
_strbuf_copy = _strbuf;
|
143
|
+
_strbuf.clear();
|
144
|
+
_current_state = C1;
|
145
|
+
switch(_strbuf_copy.length()){
|
146
|
+
case 1:
|
147
|
+
_upper_str = upper_case(_strbuf_copy);
|
148
|
+
if(_upper_str == _BOLD)
|
149
|
+
return BOLD;
|
150
|
+
if(_upper_str == _ITALIC)
|
151
|
+
return ITALIC;
|
152
|
+
if(_upper_str == _UNDERLINE)
|
153
|
+
return UNDERLINE;
|
154
|
+
if(_upper_str == _HEADER)
|
155
|
+
return HEADER;
|
156
|
+
if(_upper_str == _SMALL)
|
157
|
+
return SMALL;
|
158
|
+
if(_upper_str == _OFFTOPIC)
|
159
|
+
return OFFTOPIC;
|
160
|
+
if(_upper_str == _Q)
|
161
|
+
return QQUOTE;
|
162
|
+
if(_upper_str == _MULT)
|
163
|
+
return MULT;
|
164
|
+
break;
|
165
|
+
case 2:
|
166
|
+
_upper_str = upper_case(_strbuf_copy);
|
167
|
+
if(_upper_str == _HR)
|
168
|
+
return HR;
|
169
|
+
break;
|
170
|
+
|
171
|
+
case 3:
|
172
|
+
_upper_str = upper_case(_strbuf_copy);
|
173
|
+
if(_upper_str == _SUP)
|
174
|
+
return SUP;
|
175
|
+
if(_upper_str == _SUB)
|
176
|
+
return SUB;
|
177
|
+
if(_upper_str == _RED)
|
178
|
+
return RED;
|
179
|
+
if(_upper_str == _URL)
|
180
|
+
return URL;
|
181
|
+
if(_upper_str == _IMG)
|
182
|
+
return IMG;
|
183
|
+
if(_upper_str == _PRE)
|
184
|
+
return PRE;
|
185
|
+
if(_upper_str == _RED)
|
186
|
+
return RED;
|
187
|
+
break;
|
188
|
+
case 4:
|
189
|
+
_upper_str = upper_case(_strbuf_copy);
|
190
|
+
if(_upper_str == _SIZE)
|
191
|
+
return SIZE;
|
192
|
+
if(_upper_str == _FACE)
|
193
|
+
return FACE;
|
194
|
+
if(_upper_str == _LIST)
|
195
|
+
return LIST;
|
196
|
+
if(_upper_str == _CODE)
|
197
|
+
return CODE;
|
198
|
+
if(_upper_str == _QUOTE)
|
199
|
+
return QQUOTE;
|
200
|
+
if(_upper_str == _BLUE)
|
201
|
+
return BLUE;
|
202
|
+
break;
|
203
|
+
case 5:
|
204
|
+
_upper_str = upper_case(_strbuf_copy);
|
205
|
+
if(_upper_str == _EMAIL)
|
206
|
+
return EMAIL;
|
207
|
+
if(_upper_str == _QUOTE)
|
208
|
+
return QQUOTE;
|
209
|
+
if(_upper_str == _COLOR)
|
210
|
+
return COLOR;
|
211
|
+
if(_upper_str == _GREEN)
|
212
|
+
return GREEN;
|
213
|
+
if(_upper_str == _WHITE)
|
214
|
+
return WHITE;
|
215
|
+
if(_upper_str == _BLACK)
|
216
|
+
return BLACK;
|
217
|
+
if(_upper_str == _OLIST)
|
218
|
+
return OLIST;
|
219
|
+
case 6:
|
220
|
+
_upper_str = upper_case(_strbuf_copy);
|
221
|
+
if(_upper_str == _ORANGE)
|
222
|
+
return ORANGE;
|
223
|
+
if(_upper_str == _PURPLE)
|
224
|
+
return PURPLE;
|
225
|
+
if(_upper_str == _YELLOW)
|
226
|
+
return YELLOW;
|
227
|
+
|
228
|
+
};
|
229
|
+
return WORD;
|
230
|
+
|
231
|
+
|
232
|
+
case C6: /* we found COLON */
|
233
|
+
_strbuf_copy = _strbuf;
|
234
|
+
_strbuf.clear();
|
235
|
+
_current_state = C1;
|
236
|
+
return COLON;
|
237
|
+
|
238
|
+
case C7: /* we found SEMICOLON */
|
239
|
+
_strbuf_copy = _strbuf;
|
240
|
+
_strbuf.clear();
|
241
|
+
_current_state = C1;
|
242
|
+
return SEMICOLON;
|
243
|
+
|
244
|
+
case C8: /* we found QUOTE */
|
245
|
+
_strbuf_copy = _strbuf;
|
246
|
+
_strbuf.clear();
|
247
|
+
_current_state = C1;
|
248
|
+
return QUOTE;
|
249
|
+
|
250
|
+
case C11: /* we found SPACE */
|
251
|
+
_strbuf_copy = _strbuf;
|
252
|
+
_strbuf.clear();
|
253
|
+
_current_state = C1;
|
254
|
+
return SPACE;
|
255
|
+
|
256
|
+
case C12: /* we found EQUALITY */
|
257
|
+
_strbuf_copy = _strbuf;
|
258
|
+
_strbuf.clear();
|
259
|
+
_current_state = C1;
|
260
|
+
return EQUALITY;
|
261
|
+
|
262
|
+
case C13: /* we found LEFT_SQUARE_BRACKET */
|
263
|
+
_strbuf_copy = _strbuf;
|
264
|
+
_strbuf.clear();
|
265
|
+
_current_state = C1;
|
266
|
+
return LEFT_SQUARE_BRACKET;
|
267
|
+
|
268
|
+
case C14: /* we found RIGHT_SQUARE_BRACKET */
|
269
|
+
_strbuf_copy = _strbuf;
|
270
|
+
_strbuf.clear();
|
271
|
+
_current_state = C1;
|
272
|
+
return RIGHT_SQUARE_BRACKET;
|
273
|
+
|
274
|
+
case C15: /* we found SLASH */
|
275
|
+
_strbuf_copy = _strbuf;
|
276
|
+
_strbuf.clear();
|
277
|
+
_current_state = C1;
|
278
|
+
return SLASH;
|
279
|
+
|
280
|
+
case C16: /* we found MULT */
|
281
|
+
_strbuf_copy = _strbuf;
|
282
|
+
_strbuf.clear();
|
283
|
+
_current_state = C1;
|
284
|
+
return MULT;
|
285
|
+
|
286
|
+
default :
|
287
|
+
break;
|
288
|
+
}
|
289
|
+
|
290
|
+
}
|
291
|
+
|
292
|
+
}while(_read_chars_num <= _pconf._read_buffer_size);
|
293
|
+
|
294
|
+
return TERM;
|
295
|
+
}
|
296
|
+
|
297
|
+
}
|
data/ext/bbcode_lexer.h
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
/*!
|
2
|
+
Abstract:
|
3
|
+
this file is a part of bbcode library
|
4
|
+
|
5
|
+
Author:
|
6
|
+
Igor Franchuk (sprog@online.ru)
|
7
|
+
|
8
|
+
Last Update:
|
9
|
+
$Id: bbcode_lexer.h,v 1.1 2007/12/19 19:13:30 lanthruster Exp $
|
10
|
+
Version: 0.01
|
11
|
+
*/
|
12
|
+
|
13
|
+
#ifndef BBCODE_LEXER_H
|
14
|
+
#define BBCODE_LEXER_H
|
15
|
+
#pragma once
|
16
|
+
|
17
|
+
#include <string>
|
18
|
+
#include <iostream>
|
19
|
+
#include <istream>
|
20
|
+
#include <stdexcept>
|
21
|
+
|
22
|
+
#include "bbcode_config.h"
|
23
|
+
#include "bbcode_utils.h"
|
24
|
+
|
25
|
+
#pragma warning(disable: 4290) //VS doesn't support exception specifications
|
26
|
+
|
27
|
+
namespace{
|
28
|
+
|
29
|
+
using std::string;
|
30
|
+
using std::istream;
|
31
|
+
using std::cin;
|
32
|
+
using std::out_of_range;
|
33
|
+
using std::length_error;
|
34
|
+
using std::streamsize;
|
35
|
+
|
36
|
+
}
|
37
|
+
|
38
|
+
namespace bbcode{
|
39
|
+
|
40
|
+
//symbols
|
41
|
+
// T is TERMINATOR, NT = NOT TERMINATOR
|
42
|
+
// CHAR = anyting but (COLON, SEMICOLON, QUOTE, ENTER, CR_RETURN, SPACE, EQUALITY)
|
43
|
+
// enum symbols{CHAR, COLON = ':', SEMICOLON = ';', QUOTE = '"', ENTER = '\r', SPACEBAR = ' ', CR_RETURN = '\n'};
|
44
|
+
|
45
|
+
//EM states
|
46
|
+
enum conds{C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11, C12, C13, C14, C15, C16, C17, C18, C19,
|
47
|
+
C20, C21, C22, C23, C24, C25, C26, C27, C28, C29, C30,
|
48
|
+
C31, C32, C33, C34, C35, C36, C37, C38, C39, C40, C41,
|
49
|
+
C42, C43, C44, C45, C46, C47, C48, C49, C50, C51, C52,
|
50
|
+
C53, C54, C55, C56, C57, C58, C59, C60, C61, C62, C63,
|
51
|
+
C64, C65, C66, C67, C68, C69, C70, C71, C72, C73, C74,
|
52
|
+
C75, C76, C77, C78, C79, C80, C81, C82, C83, C84, C85,
|
53
|
+
C86, C87, C88, C89, C90, C91, C92, C93, C94, C95, C96,
|
54
|
+
C97, C98, C99,C100,C101,C102,C103,C104,C105,C106,C107,
|
55
|
+
C108,C109,C110,C111,C112,C113,C114,C115,C116,C117,C118,
|
56
|
+
C119,C120,C121,C122,C123,C124,C125,C126,C127,C129,C130,
|
57
|
+
END, T=1, NT=0, PB=2, YES=1, NO=0};
|
58
|
+
|
59
|
+
|
60
|
+
class lexer{
|
61
|
+
public:
|
62
|
+
enum flag{ON=1, OFF=0};
|
63
|
+
private:
|
64
|
+
parser_config _pconf;
|
65
|
+
char* _buffer;
|
66
|
+
size_t _tot_characters_read;
|
67
|
+
conds _current_state;
|
68
|
+
string _strbuf;
|
69
|
+
string _strbuf_copy;
|
70
|
+
|
71
|
+
string _upper_str;
|
72
|
+
|
73
|
+
size_t _i;
|
74
|
+
size_t _read_chars_num;
|
75
|
+
|
76
|
+
public:
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
/* class initializer */
|
81
|
+
void init(const parser_config& pconf);
|
82
|
+
|
83
|
+
lexeme get_lexeme(void);
|
84
|
+
string get_lexeme_str(void);
|
85
|
+
|
86
|
+
/* constructor section */
|
87
|
+
lexer() :
|
88
|
+
_tot_characters_read(0),
|
89
|
+
_buffer(NULL)
|
90
|
+
{}
|
91
|
+
|
92
|
+
~lexer() { if(_buffer != NULL){ delete[] _buffer; _buffer = NULL; } }
|
93
|
+
|
94
|
+
};
|
95
|
+
|
96
|
+
|
97
|
+
}
|
98
|
+
|
99
|
+
#endif
|