bbcodelib 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,297 @@
1
+ #include "bbcode_lexer.h"
2
+
3
+ namespace bbcode{
4
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
5
+ // CHAR ENTER CR_RET COL SEMICOL QUOTE SP EQAL PUBACK EQUALITY [ ] / *
6
+ enum conds Lexer[][17]={/*C1*/ {C2, C4, C1, C6, C7, C8, C10, C1, NT, NO, C12, C13, C14, C15, C16 }, //-
7
+ /*C2*/ {C2, C3, C3, C3, C3, C3, C3, C1, NT, NO, C3, C3, C3, C3, C3 }, //---263422640625425\r
8
+ /*C3*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, YES, C1, C1, C1, C1, C1 }, //we found WORD, pushback
9
+ /*C4*/ {C2, C1, C1, C6, C7, C8, C1, C1, NT, NO, C12, C1, C1, C1, C1 }, //\r\n
10
+ /*C5*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //EMPTY
11
+ /*C6*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found COLON
12
+ /*C7*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SEMICOLON
13
+ /*C8*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found QUOTE
14
+ /*C9*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //NOT USED
15
+ /*C10*/ {C11, C11, C11, C11, C11, C11, C10, C11, NT, NO, C11, C11, C11, C11, C1 }, //we're missing spaces
16
+ /*C11*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, YES, C1, C1, C1, C1, C11 }, //we found SPACE
17
+ /*C12*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SPACE
18
+ /*C13*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found LEFT_SQUARE_BRACKET
19
+ /*C14*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found RIGHT_SQUARE_BRACKET
20
+ /*C15*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 }, //we found SLASH
21
+ /*C16*/ {C1, C1, C1, C1, C1, C1, C1, C1, T, NO, C1, C1, C1, C1, C1 } //we found MULTIPLIER
22
+ };
23
+ //--------------------------------
24
+ // lexer::get_lexeme_str, return current lexeme
25
+ string lexer::get_lexeme_str(void){
26
+ return _strbuf_copy;
27
+ }
28
+ //--------------------------------
29
+ // lexer::init, perform initialization
30
+ void lexer::init(const parser_config& pconf){
31
+ _pconf=pconf;
32
+ //mak� sure the buffer is empty
33
+ if(_buffer != NULL){
34
+ delete [] _buffer;
35
+ _buffer = NULL;
36
+ }
37
+ _buffer = new char[_pconf._read_buffer_size]; /* allocating memory for buffer */
38
+ _tot_characters_read = 0;
39
+ _read_chars_num = 0;
40
+ _current_state = C1; /* setting machine to initial state C1 */
41
+ _strbuf.clear();
42
+ _strbuf_copy.clear();
43
+ _upper_str.clear();
44
+ /* we need this to make sure the trigger won't work in case _DEFAULT_MAX_MULTIPART_FORM_INTERNAL_PARAM_LEN < _pconf._read_buffer_size*/
45
+ _i = 0;
46
+ }
47
+
48
+ //--------------------------------
49
+ // lexer::get_lexeme
50
+
51
+ lexeme lexer::get_lexeme(void){
52
+
53
+ do{ /* we will roatate here till CIN isn't over */
54
+
55
+ if(_i == _read_chars_num){
56
+ _pconf._source_stream->read(_buffer, static_cast<streamsize>(_pconf._read_buffer_size));
57
+ _read_chars_num = static_cast<size_t>(_pconf._source_stream->gcount());
58
+ if(!_read_chars_num){
59
+
60
+ if(_current_state == C2){
61
+ _strbuf_copy = _strbuf;
62
+ _strbuf.clear();
63
+ _current_state = C1;
64
+ _i = 0; //in case we have just one word, and there is nothing more, we return word and preparing for the next TERM
65
+ return WORD;
66
+ }
67
+
68
+ return TERM; /* end of stream reached*/
69
+ }
70
+ //if(_tot_characters_read + _read_chars_num > _pconf._max_content_length) /* we have reached max_content_length limitation */
71
+ // throw out_of_range(get_module_msg("max content length reached (") + stream_cast<string>(_pconf._max_content_length) + ")");
72
+ _tot_characters_read += _read_chars_num; /* increment read character counter */
73
+ _i = 0;
74
+ }
75
+
76
+ for(; _i < _read_chars_num; _i++){
77
+
78
+ //_strbuf.push_back(_buffer[_i]);
79
+ _strbuf.append(1, _buffer[_i]);
80
+
81
+ switch(_buffer[_i]){
82
+
83
+ case COLON :
84
+ _current_state = Lexer[_current_state][3];
85
+ break;
86
+ //case SEMICOLON :
87
+ // _current_state = Lexer[_current_state][4];
88
+ // break;
89
+ case QUOTE :
90
+ _current_state = Lexer[_current_state][5];
91
+ break;
92
+ case ENTER :
93
+ _current_state = Lexer[_current_state][1];
94
+ break;
95
+ case CR_RETURN :
96
+ _current_state = Lexer[_current_state][2];
97
+ break;
98
+ case SPACEBAR :
99
+ _current_state = Lexer[_current_state][6];
100
+ break;
101
+ case EQUALITY :
102
+ _current_state = Lexer[_current_state][10];
103
+ break;
104
+ case LEFT_SQUARE_BRACKET:
105
+ _current_state = Lexer[_current_state][11];
106
+ break;
107
+ case RIGHT_SQUARE_BRACKET:
108
+ _current_state = Lexer[_current_state][12];
109
+ break;
110
+ case SLASH:
111
+ _current_state = Lexer[_current_state][13];
112
+ break;
113
+ case MULT:
114
+ _current_state = Lexer[_current_state][14];
115
+ break;
116
+
117
+ default:
118
+ _current_state = Lexer[_current_state][0];
119
+ break;
120
+ }
121
+ bool _was_erased = false;
122
+ /* now, check out if we're in Terminate state */
123
+ if(Lexer[_current_state][8] == T && Lexer[_current_state][9] == YES){
124
+ //pushing back one symbol
125
+ //take you attention that we just not have to decrese _i, (due to implementation)
126
+ if(int(_strbuf.length()-1) >= 0){
127
+ _strbuf.erase(_strbuf.length()-1);
128
+ _was_erased = true;
129
+ }
130
+ }
131
+
132
+
133
+
134
+ if(Lexer[_current_state][8] == T && Lexer[_current_state][9] == NO && _i + 1 <= _read_chars_num){
135
+ _i++;
136
+ }
137
+
138
+ switch(_current_state){
139
+ case C3: /* we found WORD */
140
+ // That it is not the proper place for this hack
141
+ // but it make parser simplier
142
+ _strbuf_copy = _strbuf;
143
+ _strbuf.clear();
144
+ _current_state = C1;
145
+ switch(_strbuf_copy.length()){
146
+ case 1:
147
+ _upper_str = upper_case(_strbuf_copy);
148
+ if(_upper_str == _BOLD)
149
+ return BOLD;
150
+ if(_upper_str == _ITALIC)
151
+ return ITALIC;
152
+ if(_upper_str == _UNDERLINE)
153
+ return UNDERLINE;
154
+ if(_upper_str == _HEADER)
155
+ return HEADER;
156
+ if(_upper_str == _SMALL)
157
+ return SMALL;
158
+ if(_upper_str == _OFFTOPIC)
159
+ return OFFTOPIC;
160
+ if(_upper_str == _Q)
161
+ return QQUOTE;
162
+ if(_upper_str == _MULT)
163
+ return MULT;
164
+ break;
165
+ case 2:
166
+ _upper_str = upper_case(_strbuf_copy);
167
+ if(_upper_str == _HR)
168
+ return HR;
169
+ break;
170
+
171
+ case 3:
172
+ _upper_str = upper_case(_strbuf_copy);
173
+ if(_upper_str == _SUP)
174
+ return SUP;
175
+ if(_upper_str == _SUB)
176
+ return SUB;
177
+ if(_upper_str == _RED)
178
+ return RED;
179
+ if(_upper_str == _URL)
180
+ return URL;
181
+ if(_upper_str == _IMG)
182
+ return IMG;
183
+ if(_upper_str == _PRE)
184
+ return PRE;
185
+ if(_upper_str == _RED)
186
+ return RED;
187
+ break;
188
+ case 4:
189
+ _upper_str = upper_case(_strbuf_copy);
190
+ if(_upper_str == _SIZE)
191
+ return SIZE;
192
+ if(_upper_str == _FACE)
193
+ return FACE;
194
+ if(_upper_str == _LIST)
195
+ return LIST;
196
+ if(_upper_str == _CODE)
197
+ return CODE;
198
+ if(_upper_str == _QUOTE)
199
+ return QQUOTE;
200
+ if(_upper_str == _BLUE)
201
+ return BLUE;
202
+ break;
203
+ case 5:
204
+ _upper_str = upper_case(_strbuf_copy);
205
+ if(_upper_str == _EMAIL)
206
+ return EMAIL;
207
+ if(_upper_str == _QUOTE)
208
+ return QQUOTE;
209
+ if(_upper_str == _COLOR)
210
+ return COLOR;
211
+ if(_upper_str == _GREEN)
212
+ return GREEN;
213
+ if(_upper_str == _WHITE)
214
+ return WHITE;
215
+ if(_upper_str == _BLACK)
216
+ return BLACK;
217
+ if(_upper_str == _OLIST)
218
+ return OLIST;
219
+ case 6:
220
+ _upper_str = upper_case(_strbuf_copy);
221
+ if(_upper_str == _ORANGE)
222
+ return ORANGE;
223
+ if(_upper_str == _PURPLE)
224
+ return PURPLE;
225
+ if(_upper_str == _YELLOW)
226
+ return YELLOW;
227
+
228
+ };
229
+ return WORD;
230
+
231
+
232
+ case C6: /* we found COLON */
233
+ _strbuf_copy = _strbuf;
234
+ _strbuf.clear();
235
+ _current_state = C1;
236
+ return COLON;
237
+
238
+ case C7: /* we found SEMICOLON */
239
+ _strbuf_copy = _strbuf;
240
+ _strbuf.clear();
241
+ _current_state = C1;
242
+ return SEMICOLON;
243
+
244
+ case C8: /* we found QUOTE */
245
+ _strbuf_copy = _strbuf;
246
+ _strbuf.clear();
247
+ _current_state = C1;
248
+ return QUOTE;
249
+
250
+ case C11: /* we found SPACE */
251
+ _strbuf_copy = _strbuf;
252
+ _strbuf.clear();
253
+ _current_state = C1;
254
+ return SPACE;
255
+
256
+ case C12: /* we found EQUALITY */
257
+ _strbuf_copy = _strbuf;
258
+ _strbuf.clear();
259
+ _current_state = C1;
260
+ return EQUALITY;
261
+
262
+ case C13: /* we found LEFT_SQUARE_BRACKET */
263
+ _strbuf_copy = _strbuf;
264
+ _strbuf.clear();
265
+ _current_state = C1;
266
+ return LEFT_SQUARE_BRACKET;
267
+
268
+ case C14: /* we found RIGHT_SQUARE_BRACKET */
269
+ _strbuf_copy = _strbuf;
270
+ _strbuf.clear();
271
+ _current_state = C1;
272
+ return RIGHT_SQUARE_BRACKET;
273
+
274
+ case C15: /* we found SLASH */
275
+ _strbuf_copy = _strbuf;
276
+ _strbuf.clear();
277
+ _current_state = C1;
278
+ return SLASH;
279
+
280
+ case C16: /* we found MULT */
281
+ _strbuf_copy = _strbuf;
282
+ _strbuf.clear();
283
+ _current_state = C1;
284
+ return MULT;
285
+
286
+ default :
287
+ break;
288
+ }
289
+
290
+ }
291
+
292
+ }while(_read_chars_num <= _pconf._read_buffer_size);
293
+
294
+ return TERM;
295
+ }
296
+
297
+ }
@@ -0,0 +1,99 @@
1
+ /*!
2
+ Abstract:
3
+ this file is a part of bbcode library
4
+
5
+ Author:
6
+ Igor Franchuk (sprog@online.ru)
7
+
8
+ Last Update:
9
+ $Id: bbcode_lexer.h,v 1.1 2007/12/19 19:13:30 lanthruster Exp $
10
+ Version: 0.01
11
+ */
12
+
13
+ #ifndef BBCODE_LEXER_H
14
+ #define BBCODE_LEXER_H
15
+ #pragma once
16
+
17
+ #include <string>
18
+ #include <iostream>
19
+ #include <istream>
20
+ #include <stdexcept>
21
+
22
+ #include "bbcode_config.h"
23
+ #include "bbcode_utils.h"
24
+
25
+ #pragma warning(disable: 4290) //VS doesn't support exception specifications
26
+
27
+ namespace{
28
+
29
+ using std::string;
30
+ using std::istream;
31
+ using std::cin;
32
+ using std::out_of_range;
33
+ using std::length_error;
34
+ using std::streamsize;
35
+
36
+ }
37
+
38
+ namespace bbcode{
39
+
40
+ //symbols
41
+ // T is TERMINATOR, NT = NOT TERMINATOR
42
+ // CHAR = anyting but (COLON, SEMICOLON, QUOTE, ENTER, CR_RETURN, SPACE, EQUALITY)
43
+ // enum symbols{CHAR, COLON = ':', SEMICOLON = ';', QUOTE = '"', ENTER = '\r', SPACEBAR = ' ', CR_RETURN = '\n'};
44
+
45
+ //EM states
46
+ enum conds{C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11, C12, C13, C14, C15, C16, C17, C18, C19,
47
+ C20, C21, C22, C23, C24, C25, C26, C27, C28, C29, C30,
48
+ C31, C32, C33, C34, C35, C36, C37, C38, C39, C40, C41,
49
+ C42, C43, C44, C45, C46, C47, C48, C49, C50, C51, C52,
50
+ C53, C54, C55, C56, C57, C58, C59, C60, C61, C62, C63,
51
+ C64, C65, C66, C67, C68, C69, C70, C71, C72, C73, C74,
52
+ C75, C76, C77, C78, C79, C80, C81, C82, C83, C84, C85,
53
+ C86, C87, C88, C89, C90, C91, C92, C93, C94, C95, C96,
54
+ C97, C98, C99,C100,C101,C102,C103,C104,C105,C106,C107,
55
+ C108,C109,C110,C111,C112,C113,C114,C115,C116,C117,C118,
56
+ C119,C120,C121,C122,C123,C124,C125,C126,C127,C129,C130,
57
+ END, T=1, NT=0, PB=2, YES=1, NO=0};
58
+
59
+
60
+ class lexer{
61
+ public:
62
+ enum flag{ON=1, OFF=0};
63
+ private:
64
+ parser_config _pconf;
65
+ char* _buffer;
66
+ size_t _tot_characters_read;
67
+ conds _current_state;
68
+ string _strbuf;
69
+ string _strbuf_copy;
70
+
71
+ string _upper_str;
72
+
73
+ size_t _i;
74
+ size_t _read_chars_num;
75
+
76
+ public:
77
+
78
+
79
+
80
+ /* class initializer */
81
+ void init(const parser_config& pconf);
82
+
83
+ lexeme get_lexeme(void);
84
+ string get_lexeme_str(void);
85
+
86
+ /* constructor section */
87
+ lexer() :
88
+ _tot_characters_read(0),
89
+ _buffer(NULL)
90
+ {}
91
+
92
+ ~lexer() { if(_buffer != NULL){ delete[] _buffer; _buffer = NULL; } }
93
+
94
+ };
95
+
96
+
97
+ }
98
+
99
+ #endif