bbcodelib 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,297 @@
1
+ #include "bbcode_lexer.h"
2
+
3
+ namespace bbcode{
4
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
5
+ // CHAR ENTER CR_RET COL SEMICOL QUOTE SP EQAL PUBACK EQUALITY [ ] / *
6
+ enum conds Lexer[][17]={/*C1*/ {C2, C4, C1, C6, C7, C8, C10, C1, NT, NO, C12, C13, C14, C15, C16 }, //-
7
+ /*C2*/ {C2, C3, C3, C3, C3, C3, C3, C1, NT, NO, C3, C3, C3, C3, C3 }, //---263422640625425\r
8
+ /*C3*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, YES, C1, C1, C1, C1, C1 }, //we found WORD, pushback
9
+ /*C4*/ {C2, C1, C1, C6, C7, C8, C1, C1, NT, NO, C12, C1, C1, C1, C1 }, //\r\n
10
+ /*C5*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //EMPTY
11
+ /*C6*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found COLON
12
+ /*C7*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SEMICOLON
13
+ /*C8*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found QUOTE
14
+ /*C9*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //NOT USED
15
+ /*C10*/ {C11, C11, C11, C11, C11, C11, C10, C11, NT, NO, C11, C11, C11, C11, C1 }, //we're missing spaces
16
+ /*C11*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, YES, C1, C1, C1, C1, C11 }, //we found SPACE
17
+ /*C12*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SPACE
18
+ /*C13*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found LEFT_SQUARE_BRACKET
19
+ /*C14*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found RIGHT_SQUARE_BRACKET
20
+ /*C15*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SLASH
21
+ /*C16*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 } //we found MULTIPLIER
22
+ };
23
+ //--------------------------------
24
+ // lexer::get_lexeme_str, return current lexeme
25
+ string lexer::get_lexeme_str(void){
26
+ return _strbuf_copy;
27
+ }
28
+ //--------------------------------
29
+ // lexer::init, perform initialization
30
+ void lexer::init(const parser_config& pconf){
31
+ _pconf=pconf;
32
+ //mak� sure the buffer is empty
33
+ if(_buffer != NULL){
34
+ delete [] _buffer;
35
+ _buffer = NULL;
36
+ }
37
+ _buffer = new char[_pconf._read_buffer_size]; /* allocating memory for buffer */
38
+ _tot_characters_read = 0;
39
+ _read_chars_num = 0;
40
+ _current_state = C1; /* setting machine to initial state C1 */
41
+ _strbuf.clear();
42
+ _strbuf_copy.clear();
43
+ _upper_str.clear();
44
+ /* we need this to make sure the trigger won't work in case _DEFAULT_MAX_MULTIPART_FORM_INTERNAL_PARAM_LEN < _pconf._read_buffer_size*/
45
+ _i = 0;
46
+ }
47
+
48
+ //--------------------------------
49
+ // lexer::get_lexeme
50
+
51
+ lexeme lexer::get_lexeme(void){
52
+
53
+ do{ /* we will roatate here till CIN isn't over */
54
+
55
+ if(_i == _read_chars_num){
56
+ _pconf._source_stream->read(_buffer, static_cast<streamsize>(_pconf._read_buffer_size));
57
+ _read_chars_num = static_cast<size_t>(_pconf._source_stream->gcount());
58
+ if(!_read_chars_num){
59
+
60
+ if(_current_state == C2){
61
+ _strbuf_copy = _strbuf;
62
+ _strbuf.clear();
63
+ _current_state = C1;
64
+ _i = 0; //in case we have just one word, and there is nothing more, we return word and preparing for the next TERM
65
+ return WORD;
66
+ }
67
+
68
+ return TERM; /* end of stream reached*/
69
+ }
70
+ //if(_tot_characters_read + _read_chars_num > _pconf._max_content_length) /* we have reached max_content_length limitation */
71
+ // throw out_of_range(get_module_msg("max content length reached (") + stream_cast<string>(_pconf._max_content_length) + ")");
72
+ _tot_characters_read += _read_chars_num; /* increment read character counter */
73
+ _i = 0;
74
+ }
75
+
76
+ for(; _i < _read_chars_num; _i++){
77
+
78
+ //_strbuf.push_back(_buffer[_i]);
79
+ _strbuf.append(1, _buffer[_i]);
80
+
81
+ switch(_buffer[_i]){
82
+
83
+ case COLON :
84
+ _current_state = Lexer[_current_state][3];
85
+ break;
86
+ //case SEMICOLON :
87
+ // _current_state = Lexer[_current_state][4];
88
+ // break;
89
+ case QUOTE :
90
+ _current_state = Lexer[_current_state][5];
91
+ break;
92
+ case ENTER :
93
+ _current_state = Lexer[_current_state][1];
94
+ break;
95
+ case CR_RETURN :
96
+ _current_state = Lexer[_current_state][2];
97
+ break;
98
+ case SPACEBAR :
99
+ _current_state = Lexer[_current_state][6];
100
+ break;
101
+ case EQUALITY :
102
+ _current_state = Lexer[_current_state][10];
103
+ break;
104
+ case LEFT_SQUARE_BRACKET:
105
+ _current_state = Lexer[_current_state][11];
106
+ break;
107
+ case RIGHT_SQUARE_BRACKET:
108
+ _current_state = Lexer[_current_state][12];
109
+ break;
110
+ case SLASH:
111
+ _current_state = Lexer[_current_state][13];
112
+ break;
113
+ case MULT:
114
+ _current_state = Lexer[_current_state][14];
115
+ break;
116
+
117
+ default:
118
+ _current_state = Lexer[_current_state][0];
119
+ break;
120
+ }
121
+ bool _was_erased = false;
122
+ /* now, check out if we're in Terminate state */
123
+ if(Lexer[_current_state][8] == T && Lexer[_current_state][9] == YES){
124
+ //pushing back one symbol
125
+ //take you attention that we just not have to decrese _i, (due to implementation)
126
+ if(int(_strbuf.length()-1) >= 0){
127
+ _strbuf.erase(_strbuf.length()-1);
128
+ _was_erased = true;
129
+ }
130
+ }
131
+
132
+
133
+
134
+ if(Lexer[_current_state][8] == T && Lexer[_current_state][9] == NO && _i + 1 <= _read_chars_num){
135
+ _i++;
136
+ }
137
+
138
+ switch(_current_state){
139
+ case C3: /* we found WORD */
140
+ // That it is not the proper place for this hack
141
+ // but it make parser simplier
142
+ _strbuf_copy = _strbuf;
143
+ _strbuf.clear();
144
+ _current_state = C1;
145
+ switch(_strbuf_copy.length()){
146
+ case 1:
147
+ _upper_str = upper_case(_strbuf_copy);
148
+ if(_upper_str == _BOLD)
149
+ return BOLD;
150
+ if(_upper_str == _ITALIC)
151
+ return ITALIC;
152
+ if(_upper_str == _UNDERLINE)
153
+ return UNDERLINE;
154
+ if(_upper_str == _HEADER)
155
+ return HEADER;
156
+ if(_upper_str == _SMALL)
157
+ return SMALL;
158
+ if(_upper_str == _OFFTOPIC)
159
+ return OFFTOPIC;
160
+ if(_upper_str == _Q)
161
+ return QQUOTE;
162
+ if(_upper_str == _MULT)
163
+ return MULT;
164
+ break;
165
+ case 2:
166
+ _upper_str = upper_case(_strbuf_copy);
167
+ if(_upper_str == _HR)
168
+ return HR;
169
+ break;
170
+
171
+ case 3:
172
+ _upper_str = upper_case(_strbuf_copy);
173
+ if(_upper_str == _SUP)
174
+ return SUP;
175
+ if(_upper_str == _SUB)
176
+ return SUB;
177
+ if(_upper_str == _RED)
178
+ return RED;
179
+ if(_upper_str == _URL)
180
+ return URL;
181
+ if(_upper_str == _IMG)
182
+ return IMG;
183
+ if(_upper_str == _PRE)
184
+ return PRE;
185
+ if(_upper_str == _RED)
186
+ return RED;
187
+ break;
188
+ case 4:
189
+ _upper_str = upper_case(_strbuf_copy);
190
+ if(_upper_str == _SIZE)
191
+ return SIZE;
192
+ if(_upper_str == _FACE)
193
+ return FACE;
194
+ if(_upper_str == _LIST)
195
+ return LIST;
196
+ if(_upper_str == _CODE)
197
+ return CODE;
198
+ if(_upper_str == _QUOTE)
199
+ return QQUOTE;
200
+ if(_upper_str == _BLUE)
201
+ return BLUE;
202
+ break;
203
+ case 5:
204
+ _upper_str = upper_case(_strbuf_copy);
205
+ if(_upper_str == _EMAIL)
206
+ return EMAIL;
207
+ if(_upper_str == _QUOTE)
208
+ return QQUOTE;
209
+ if(_upper_str == _COLOR)
210
+ return COLOR;
211
+ if(_upper_str == _GREEN)
212
+ return GREEN;
213
+ if(_upper_str == _WHITE)
214
+ return WHITE;
215
+ if(_upper_str == _BLACK)
216
+ return BLACK;
217
+ if(_upper_str == _OLIST)
218
+ return OLIST;
219
+ case 6:
220
+ _upper_str = upper_case(_strbuf_copy);
221
+ if(_upper_str == _ORANGE)
222
+ return ORANGE;
223
+ if(_upper_str == _PURPLE)
224
+ return PURPLE;
225
+ if(_upper_str == _YELLOW)
226
+ return YELLOW;
227
+
228
+ };
229
+ return WORD;
230
+
231
+
232
+ case C6: /* we found COLON */
233
+ _strbuf_copy = _strbuf;
234
+ _strbuf.clear();
235
+ _current_state = C1;
236
+ return COLON;
237
+
238
+ case C7: /* we found SEMICOLON */
239
+ _strbuf_copy = _strbuf;
240
+ _strbuf.clear();
241
+ _current_state = C1;
242
+ return SEMICOLON;
243
+
244
+ case C8: /* we found QUOTE */
245
+ _strbuf_copy = _strbuf;
246
+ _strbuf.clear();
247
+ _current_state = C1;
248
+ return QUOTE;
249
+
250
+ case C11: /* we found SPACE */
251
+ _strbuf_copy = _strbuf;
252
+ _strbuf.clear();
253
+ _current_state = C1;
254
+ return SPACE;
255
+
256
+ case C12: /* we found EQUALITY */
257
+ _strbuf_copy = _strbuf;
258
+ _strbuf.clear();
259
+ _current_state = C1;
260
+ return EQUALITY;
261
+
262
+ case C13: /* we found LEFT_SQUARE_BRACKET */
263
+ _strbuf_copy = _strbuf;
264
+ _strbuf.clear();
265
+ _current_state = C1;
266
+ return LEFT_SQUARE_BRACKET;
267
+
268
+ case C14: /* we found RIGHT_SQUARE_BRACKET */
269
+ _strbuf_copy = _strbuf;
270
+ _strbuf.clear();
271
+ _current_state = C1;
272
+ return RIGHT_SQUARE_BRACKET;
273
+
274
+ case C15: /* we found SLASH */
275
+ _strbuf_copy = _strbuf;
276
+ _strbuf.clear();
277
+ _current_state = C1;
278
+ return SLASH;
279
+
280
+ case C16: /* we found MULT */
281
+ _strbuf_copy = _strbuf;
282
+ _strbuf.clear();
283
+ _current_state = C1;
284
+ return MULT;
285
+
286
+ default :
287
+ break;
288
+ }
289
+
290
+ }
291
+
292
+ }while(_read_chars_num <= _pconf._read_buffer_size);
293
+
294
+ return TERM;
295
+ }
296
+
297
+ }
@@ -0,0 +1,99 @@
1
+ /*!
2
+ Abstract:
3
+ this file is a part of bbcode library
4
+
5
+ Author:
6
+ Igor Franchuk (sprog@online.ru)
7
+
8
+ Last Update:
9
+ $Id: bbcode_lexer.h,v 1.1 2007/12/19 19:13:30 lanthruster Exp $
10
+ Version: 0.01
11
+ */
12
+
13
+ #ifndef BBCODE_LEXER_H
14
+ #define BBCODE_LEXER_H
15
+ #pragma once
16
+
17
+ #include <string>
18
+ #include <iostream>
19
+ #include <istream>
20
+ #include <stdexcept>
21
+
22
+ #include "bbcode_config.h"
23
+ #include "bbcode_utils.h"
24
+
25
+ #pragma warning(disable: 4290) //VS doesn't support exception specifications
26
+
27
+ namespace{
28
+
29
+ using std::string;
30
+ using std::istream;
31
+ using std::cin;
32
+ using std::out_of_range;
33
+ using std::length_error;
34
+ using std::streamsize;
35
+
36
+ }
37
+
38
+ namespace bbcode{
39
+
40
+ //symbols
41
+ // T is TERMINATOR, NT = NOT TERMINATOR
42
+ // CHAR = anyting but (COLON, SEMICOLON, QUOTE, ENTER, CR_RETURN, SPACE, EQUALITY)
43
+ // enum symbols{CHAR, COLON = ':', SEMICOLON = ';', QUOTE = '"', ENTER = '\r', SPACEBAR = ' ', CR_RETURN = '\n'};
44
+
45
+ //EM states
46
+ enum conds{C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11, C12, C13, C14, C15, C16, C17, C18, C19,
47
+ C20, C21, C22, C23, C24, C25, C26, C27, C28, C29, C30,
48
+ C31, C32, C33, C34, C35, C36, C37, C38, C39, C40, C41,
49
+ C42, C43, C44, C45, C46, C47, C48, C49, C50, C51, C52,
50
+ C53, C54, C55, C56, C57, C58, C59, C60, C61, C62, C63,
51
+ C64, C65, C66, C67, C68, C69, C70, C71, C72, C73, C74,
52
+ C75, C76, C77, C78, C79, C80, C81, C82, C83, C84, C85,
53
+ C86, C87, C88, C89, C90, C91, C92, C93, C94, C95, C96,
54
+ C97, C98, C99,C100,C101,C102,C103,C104,C105,C106,C107,
55
+ C108,C109,C110,C111,C112,C113,C114,C115,C116,C117,C118,
56
+ C119,C120,C121,C122,C123,C124,C125,C126,C127,C129,C130,
57
+ END, T=1, NT=0, PB=2, YES=1, NO=0};
58
+
59
+
60
+ class lexer{
61
+ public:
62
+ enum flag{ON=1, OFF=0};
63
+ private:
64
+ parser_config _pconf;
65
+ char* _buffer;
66
+ size_t _tot_characters_read;
67
+ conds _current_state;
68
+ string _strbuf;
69
+ string _strbuf_copy;
70
+
71
+ string _upper_str;
72
+
73
+ size_t _i;
74
+ size_t _read_chars_num;
75
+
76
+ public:
77
+
78
+
79
+
80
+ /* class initializer */
81
+ void init(const parser_config& pconf);
82
+
83
+ lexeme get_lexeme(void);
84
+ string get_lexeme_str(void);
85
+
86
+ /* constructor section */
87
+ lexer() :
88
+ _tot_characters_read(0),
89
+ _buffer(NULL)
90
+ {}
91
+
92
+ ~lexer() { if(_buffer != NULL){ delete[] _buffer; _buffer = NULL; } }
93
+
94
+ };
95
+
96
+
97
+ }
98
+
99
+ #endif