ios_parser 0.5.1-java

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,90 @@
1
+ ios_parser
2
+ ==========
3
+
4
+ [![Gem Version](https://badge.fury.io/rb/ios_parser.svg)](https://rubygems.org/gems/ios_parser)
5
+ [![Build Status](https://travis-ci.org/bjmllr/ios_parser.svg)](https://travis-ci.org/bjmllr/ios_parser)
6
+
7
+ convert switch and router config files to structured data
8
+
9
+ Basic Parsing
10
+ -------------
11
+ require 'ios_parser'
12
+ text = my_method_to_get_a_raw_config
13
+ config = IOSParser.parse(text)
14
+
15
+ JSON Serialization and Deserialization
16
+ --------------------------------------
17
+ my_http_client.put_json(config.to_json)
18
+ config = IOSParser.from_json(my_http_client.get_json)
19
+
20
+ Query for a single element (the first to match)
21
+ -----------------------------------------------
22
+ config.find('hostname').to_hash
23
+ # => { :args => ["hostname", "myswitch"], :commands => [] }
24
+
25
+ `case`-style Queries
26
+ --------------------
27
+ config.find_all(starts_with: ['interface', /Gigabit/])
28
+ # => [{:args=>["interface", "GigabitEthernet0/1"],
29
+ # :commands=>[{:args=>["switchport", "mode", "trunk"], :commands=>[]},
30
+ # {:args=>["logging", "event", "trunk-status"], :commands=>[]},
31
+ # {:args=>["speed", 1000], :commands=>[]}]},
32
+ # {:args=>["interface", "GigabitEthernet0/2"],
33
+ # :commands=>[{:args=>["switchport", "mode", "trunk"], :commands=>[]},
34
+ # {:args=>["logging", "event", "trunk-status"], :commands=>[]},
35
+ # {:args=>["speed", 1000], :commands=>[]}]}]
36
+
37
+ Chained Queries
38
+ ---------------
39
+ config.find(starts_with: ['interface', 'GigabitEthernet0/1']).find('speed').args[1]
40
+ # => 1000
41
+
42
+ Nesting Queries
43
+ ---------------
44
+ `#find_all` returns an `Array`, so you can't chain `IOSParser` queries after it. Instead, you can use nested queries with Ruby's `Array` and `Enumerable` APIs. This is useful to transform and clean data.
45
+
46
+ config.find_all("interface").flat_map do |i|
47
+ s = i.find("speed")
48
+ s ? [{ interface: i.args.last, speed: s.args.last }] : []
49
+ end
50
+ # => [{:interface=>"GigabitEthernet0/1", :speed=>1000},
51
+ # {:interface=>"GigabitEthernet0/2", :speed=>1000}]
52
+
53
+ Compound Query Matchers
54
+ -----------------------
55
+ Compound matchers combine or modify the meaning of other matchers. Their argument can be a single hash if all of the affected matchers have different names, and an array of hashes if it is necessary to use the same matcher name with multiple arguments.
56
+
57
+ Available Compound Query Matchers
58
+ ---------------------------------
59
+ * `parent` - matches commands by their parents (e.g., `parent: { starts_with: 'interface' }` will match the first level of subcommands of any interface section)
60
+ * `any_child` - matches commands that match at least one child command (e.g., `any_child: { name: 'speed' }` will match any command that has a child command starting with `speed`)
61
+ * `no_child` - matches commands that do not match any child command (e.g., `no_child: { name: 'speed' }` will match commands that do not have a child command starting with `speed`)
62
+ * `any` - matches commands that match any of an array of queries (e.g., `any: [{ starts_with: 'interface' }, { starts_with: 'ip route' }]` will match all interfaces and all IOS-style static routes)
63
+ * `all` - matches commands that match all of an array of queries (e.g., `all: { starts_with: 'interface', line: /FastEthernet/ }` will match all FastEthernet interfaces)
64
+ * `none` - negation of `any`
65
+ * `not_all` / `not` - negation of `all`
66
+
67
+ Available Base Query Matchers
68
+ -----------------------------
69
+ * `name` - matches the first argument of a command (e.g., `name: ip` will match `ip route` or `ip http server`)
70
+ * `starts_with` - matches the leading arguments of a command
71
+ * `contains` - matches any sequence of arguments of a command
72
+ * `ends_with` - matches the trailling arguments of a command
73
+ * `line` - matches the string form of a command (all the arguments separated by single spaces)
74
+ * `depth` - matches based on how many command sections contain the command (e.g., `depth: 0` will only match top-level commands), accepts integers and integer ranges
75
+
76
+ ## Development
77
+
78
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
79
+
80
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
81
+
82
+ ## Contributing
83
+
84
+ Bug reports and pull requests are welcome on GitHub at https://github.com/bjmllr/ios_parser. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
85
+
86
+ ## Copyright and License
87
+
88
+ Copyright (C) 2016 Ben Miller
89
+
90
+ The gem is available as free software under the terms of the [GNU General Public License, Version 3](http://www.gnu.org/licenses/gpl-3.0.html).
data/Rakefile ADDED
@@ -0,0 +1,20 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ require 'rake/extensiontask'
8
+ spec = Gem::Specification.load('ios_parser.gemspec')
9
+ Rake::ExtensionTask.new do |ext|
10
+ ext.name = 'c_lexer'
11
+ ext.ext_dir = 'ext/ios_parser/c_lexer'
12
+ ext.lib_dir = 'lib/ios_parser'
13
+ ext.gem_spec = spec
14
+ end
15
+
16
+ if RUBY_ENGINE == 'jruby'
17
+ task default: :spec
18
+ else
19
+ task default: [:compile, :spec]
20
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'ios_parser'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require 'irb'
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,41 @@
1
+ # Generate png with
2
+ # dot state_machine.graphviz -Tpng > state_machine.png
3
+
4
+ digraph g{
5
+ rankdir="LR";
6
+ node [shape = circle];
7
+ # from root
8
+ LEX_STATE_ROOT -> LEX_STATE_BANNER;
9
+ LEX_STATE_ROOT -> LEX_STATE_CERTIFICATE;
10
+ LEX_STATE_ROOT -> LEX_STATE_COMMENT;
11
+ LEX_STATE_ROOT -> LEX_STATE_INTEGER;
12
+ LEX_STATE_ROOT -> LEX_STATE_WORD;
13
+ LEX_STATE_ROOT -> LEX_STATE_INDENT;
14
+ # from certificate
15
+ LEX_STATE_CERTIFICATE -> LEX_STATE_INDENT;
16
+ LEX_STATE_CERTIFICATE -> LEX_STATE_ROOT;
17
+ # from indent
18
+ LEX_STATE_INDENT -> LEX_STATE_ROOT;
19
+ LEX_STATE_INDENT -> LEX_STATE_BANNER;
20
+ LEX_STATE_INDENT -> LEX_STATE_CERTIFICATE;
21
+ LEX_STATE_INDENT -> LEX_STATE_COMMENT;
22
+ LEX_STATE_INDENT -> LEX_STATE_INTEGER;
23
+ LEX_STATE_INDENT -> LEX_STATE_WORD;
24
+ # from comment
25
+ LEX_STATE_COMMENT -> LEX_STATE_ROOT;
26
+ # from integer
27
+ LEX_STATE_INTEGER -> LEX_STATE_DECIMAL;
28
+ LEX_STATE_INTEGER -> LEX_STATE_ROOT;
29
+ LEX_STATE_INTEGER -> LEX_STATE_INDENT;
30
+ LEX_STATE_INTEGER -> LEX_STATE_WORD;
31
+ # from decimal
32
+ LEX_STATE_DECIMAL -> LEX_STATE_WORD;
33
+ LEX_STATE_DECIMAL -> LEX_STATE_ROOT;
34
+ LEX_STATE_DECIMAL -> LEX_STATE_INDENT;
35
+ # from word
36
+ LEX_STATE_WORD -> LEX_STATE_ROOT;
37
+ LEX_STATE_WORD -> LEX_STATE_INDENT;
38
+ # from banner
39
+ LEX_STATE_BANNER -> LEX_STATE_ROOT;
40
+
41
+ }
Binary file
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+ extension_name = 'ios_parser/c_lexer'
3
+ dir_config(extension_name)
4
+ create_makefile(extension_name)
@@ -0,0 +1,507 @@
1
+ #include <ruby.h>
2
+
3
+ static VALUE rb_mIOSParser;
4
+ static VALUE rb_cCLexer;
5
+ VALUE rb_eLexError;
6
+
7
+ typedef enum lex_token_state {
8
+ LEX_STATE_ROOT,
9
+ LEX_STATE_INTEGER,
10
+ LEX_STATE_DECIMAL,
11
+ LEX_STATE_QUOTED_STRING,
12
+ LEX_STATE_WORD,
13
+ LEX_STATE_COMMENT,
14
+ LEX_STATE_BANNER,
15
+ LEX_STATE_CERTIFICATE,
16
+ LEX_STATE_INDENT,
17
+ } lex_token_state;
18
+
19
+ struct LexInfo {
20
+ char *text;
21
+ size_t pos;
22
+ size_t token_start;
23
+ size_t token_length;
24
+ lex_token_state token_state;
25
+ VALUE tokens;
26
+ int indent;
27
+ int indent_pos;
28
+ int indents[100];
29
+ char banner_delimiter;
30
+ char string_terminator;
31
+ };
32
+ typedef struct LexInfo LexInfo;
33
+
34
+ #define IS_SPACE(C) C == ' ' || C == '\t' || C == '\r'
35
+ #define IS_NEWLINE(C) C == '\n'
36
+ #define IS_COMMENT(C) C == '!'
37
+ #define IS_DIGIT(C) '0' <= C && C <= '9'
38
+ #define IS_DOT(C) C == '.'
39
+ #define IS_DECIMAL(C) IS_DIGIT(C) || IS_DOT(C)
40
+ #define IS_LETTER(C) 'a' <= C && C <= 'z' || 'A' <= C && C <= 'Z'
41
+ #define IS_PUNCT(C) strchr("-+$:/,()|*#=<>!\"\\&@;%~{}'\"?[]_^`", C)
42
+ #define IS_WORD(C) IS_DECIMAL(C) || IS_LETTER(C) || IS_PUNCT(C)
43
+ #define IS_LEAD_ZERO(C) C == '0'
44
+ #define IS_QUOTE(C) C == '"' || C == '\''
45
+ #define IS_LEAD_COMMENT(C) C == '#' || C == '!'
46
+
47
+ #define CURRENT_CHAR(LEX) LEX->text[LEX->pos]
48
+ #define TOKEN_EMPTY(LEX) LEX->token_length <= 0
49
+
50
+ #define MAKE_TOKEN(LEX, TOK) rb_ary_new3(2, rb_int_new(LEX->token_start), TOK)
51
+ #define ADD_TOKEN(LEX, TOK) rb_ary_push(LEX->tokens, MAKE_TOKEN(LEX, TOK))
52
+
53
+ #define CMD_LEN(CMD) (sizeof(CMD) - 1)
54
+ int is_certificate(LexInfo *lex) {
55
+ VALUE indent_ary, indent, command_ary, command;
56
+ int token_count, indent_pos, command_pos;
57
+
58
+ token_count = RARRAY_LEN(lex->tokens);
59
+ indent_pos = token_count - 6;
60
+ if (indent_pos < 0) { return 0; }
61
+
62
+ command_pos = token_count - 5;
63
+ if (command_pos < 0) { return 0; }
64
+
65
+ indent_ary = rb_ary_entry(lex->tokens, indent_pos);
66
+ indent = rb_ary_entry(indent_ary, 1);
67
+ if (TYPE(indent) != T_SYMBOL) { return 0; }
68
+ if (rb_intern("INDENT") != SYM2ID(indent)) { return 0; }
69
+
70
+ command_ary = rb_ary_entry(lex->tokens, command_pos);
71
+ if (TYPE(command_ary) != T_ARRAY) { return 0; }
72
+ if (RARRAY_LEN(command_ary) < 2) { return 0; }
73
+
74
+ command = rb_ary_entry(command_ary, 1);
75
+ if (TYPE(command) != T_STRING) { return 0; }
76
+
77
+ StringValue(command);
78
+ if (RSTRING_LEN(command) != CMD_LEN("certificate")) { return 0; }
79
+ if (0 != strncmp(RSTRING_PTR(command), "certificate", 11)) { return 0; }
80
+
81
+ return 1;
82
+ }
83
+
84
+ int is_banner_begin(LexInfo *lex) {
85
+ VALUE banner_ary, banner;
86
+ int token_count = RARRAY_LEN(lex->tokens);
87
+ int banner_pos = token_count - 2;
88
+
89
+ if (banner_pos < 0) { return 0; }
90
+
91
+ banner_ary = rb_ary_entry(lex->tokens, banner_pos);
92
+ banner = rb_ary_entry(banner_ary, 1);
93
+ if (TYPE(banner) != T_STRING) { return 0; }
94
+
95
+ StringValue(banner);
96
+ if (RSTRING_LEN(banner) != CMD_LEN("banner")) { return 0; }
97
+ if (0 != strncmp(RSTRING_PTR(banner), "banner", 6)) { return 0; }
98
+
99
+ return 1;
100
+ }
101
+
102
+ static void delimit(LexInfo *lex) {
103
+ VALUE token;
104
+ char string[lex->token_length + 1];
105
+
106
+ if (TOKEN_EMPTY(lex)) {
107
+ lex->token_state = LEX_STATE_ROOT;
108
+ return;
109
+ }
110
+
111
+ switch (lex->token_state) {
112
+ case (LEX_STATE_QUOTED_STRING):
113
+ case (LEX_STATE_WORD):
114
+ case (LEX_STATE_BANNER):
115
+ case (LEX_STATE_CERTIFICATE):
116
+ token = rb_str_new(&lex->text[lex->token_start], lex->token_length);
117
+ break;
118
+
119
+ case (LEX_STATE_INTEGER):
120
+ strncpy(string, &lex->text[lex->token_start], lex->token_length);
121
+ string[lex->token_length] = '\0';
122
+ token = rb_int_new(atoi(string));
123
+ break;
124
+
125
+ case (LEX_STATE_DECIMAL):
126
+ strncpy(string, &lex->text[lex->token_start], lex->token_length);
127
+ string[lex->token_length] = '\0';
128
+ token = rb_float_new(atof(string));
129
+ break;
130
+
131
+ case (LEX_STATE_COMMENT):
132
+ lex->token_state = LEX_STATE_ROOT;
133
+ return;
134
+
135
+ default:
136
+ rb_raise(rb_eRuntimeError,
137
+ "Unable to commit token %s at %d",
138
+ string, (int)lex->pos);
139
+ return;
140
+ }
141
+
142
+ ADD_TOKEN(lex, token);
143
+ lex->token_state = LEX_STATE_ROOT;
144
+ lex->token_length = 0;
145
+ }
146
+
147
+ static void deallocate(void * lex) {
148
+ xfree(lex);
149
+ }
150
+
151
+ static void mark(void *ptr) {
152
+ LexInfo *lex = (LexInfo *)ptr;
153
+ rb_gc_mark(lex->tokens);
154
+ }
155
+
156
+ static VALUE allocate(VALUE klass) {
157
+ LexInfo * lex = ALLOC(LexInfo);
158
+ return Data_Wrap_Struct(klass, mark, deallocate, lex);
159
+ }
160
+
161
+ static VALUE initialize(VALUE self, VALUE input_text) {
162
+ LexInfo *lex;
163
+ Data_Get_Struct(self, LexInfo, lex);
164
+
165
+ lex->text = NULL;
166
+ lex->pos = 0;
167
+ lex->token_start = 0;
168
+ lex->token_length = 0;
169
+ lex->token_state = LEX_STATE_ROOT;
170
+ lex->tokens = rb_ary_new();
171
+
172
+ lex->indent = 0;
173
+ lex->indent_pos = 0;
174
+ lex->indents[0] = 0;
175
+
176
+ return self;
177
+ }
178
+
179
+ static void process_root(LexInfo * lex);
180
+ static void process_start_of_line(LexInfo * lex);
181
+ static void start_banner(LexInfo * lex);
182
+
183
+ static void process_newline(LexInfo *lex) {
184
+ delimit(lex);
185
+
186
+ if (is_banner_begin(lex)) {
187
+ lex->token_state = LEX_STATE_BANNER;
188
+ start_banner(lex);
189
+ lex->pos = lex->pos + 1;
190
+ lex->token_start = lex->pos;
191
+ lex->token_length = 0;
192
+ return;
193
+ }
194
+
195
+ lex->token_start = lex->pos;
196
+ ADD_TOKEN(lex, ID2SYM(rb_intern("EOL")));
197
+ lex->token_state = LEX_STATE_INDENT;
198
+ lex->indent = 0;
199
+ }
200
+
201
+ static void process_space(LexInfo *lex) {
202
+ delimit(lex);
203
+ }
204
+
205
+ static void process_comment(LexInfo *lex) {
206
+ char c = CURRENT_CHAR(lex);
207
+
208
+ if (IS_NEWLINE(c)) {
209
+ delimit(lex);
210
+ lex->token_state = LEX_STATE_INDENT;
211
+ lex->indent = 0;
212
+ }
213
+ }
214
+
215
+ static void process_quoted_string(LexInfo *lex) {
216
+ char c = CURRENT_CHAR(lex);
217
+
218
+ lex->token_length++;
219
+ if (!lex->string_terminator) {
220
+ lex->string_terminator = c;
221
+ } else if (c == lex->string_terminator) {
222
+ delimit(lex);
223
+ }
224
+ }
225
+
226
+ static void process_word(LexInfo *lex) {
227
+ char c = CURRENT_CHAR(lex);
228
+
229
+ if (IS_WORD(c)) {
230
+ lex->token_length++;
231
+ } else if (IS_SPACE(c)) {
232
+ process_space(lex);
233
+ } else if (IS_NEWLINE(c)) {
234
+ process_newline(lex);
235
+ }
236
+ }
237
+
238
+ static void process_decimal(LexInfo *lex) {
239
+ char c = CURRENT_CHAR(lex);
240
+
241
+ if (IS_DIGIT(c)) {
242
+ lex->token_length++;
243
+ } else if (IS_WORD(c)) {
244
+ lex->token_length++;
245
+ lex->token_state = LEX_STATE_WORD;
246
+ } else if (IS_SPACE(c)) {
247
+ process_space(lex);
248
+ } else if (IS_NEWLINE(c)) {
249
+ process_newline(lex);
250
+ }
251
+ }
252
+
253
+ static void process_integer(LexInfo *lex) {
254
+ char c = CURRENT_CHAR(lex);
255
+
256
+ if (IS_DIGIT(c)) {
257
+ lex->token_length++;
258
+ } else if (c == '.') {
259
+ lex->token_length++;
260
+ lex->token_state = LEX_STATE_DECIMAL;
261
+ } else if (IS_SPACE(c)) {
262
+ process_space(lex);
263
+ } else if (IS_NEWLINE(c)) {
264
+ process_newline(lex);
265
+ } else if (IS_WORD(c)) {
266
+ process_word(lex);
267
+ lex->token_state = LEX_STATE_WORD;
268
+ }
269
+ }
270
+
271
+ static void process_certificate(LexInfo *lex) {
272
+ char quit[5];
273
+
274
+ strncpy(quit, &CURRENT_CHAR(lex) - 5, 5);
275
+
276
+ if (0 == strncmp("quit\n", quit, 5)) {
277
+ int length = lex->token_length;
278
+ VALUE token;
279
+
280
+ length = length - 5;
281
+ while(' ' == lex->text[lex->token_start + length - 1]) {
282
+ length--;
283
+ }
284
+ lex->token_length = length;
285
+
286
+ token = rb_str_new(&lex->text[lex->token_start], lex->token_length);
287
+
288
+ rb_funcall(token, rb_intern("gsub!"), 2,
289
+ rb_str_new2("\n"), rb_str_new2(""));
290
+
291
+ rb_funcall(token, rb_intern("gsub!"), 2,
292
+ rb_str_new2(" "), rb_str_new2(" "));
293
+
294
+ ADD_TOKEN(lex, token);
295
+ lex->token_length = 0;
296
+
297
+ lex->token_start = lex->pos;
298
+ ADD_TOKEN(lex, ID2SYM(rb_intern("CERTIFICATE_END")));
299
+
300
+ process_newline(lex);
301
+ process_start_of_line(lex);
302
+ } else {
303
+ lex->token_length++;
304
+ }
305
+ }
306
+
307
+ static void start_certificate(LexInfo *lex) {
308
+ lex->indent_pos--;
309
+ rb_ary_pop(lex->tokens);
310
+ rb_ary_pop(lex->tokens);
311
+ ADD_TOKEN(lex, ID2SYM(rb_intern("CERTIFICATE_BEGIN")));
312
+ process_certificate(lex);
313
+ }
314
+
315
+ int is_banner_end_char(LexInfo *lex) {
316
+ return CURRENT_CHAR(lex) == lex->banner_delimiter &&
317
+ (0 < lex->pos && '\n' == lex->text[lex->pos - 1] ||
318
+ '\n' == lex->text[lex->pos + 1]);
319
+ }
320
+
321
+ int is_banner_end_string(LexInfo *lex) {
322
+ /* onlys accept the banner-ending string "EOF" */
323
+ return (CURRENT_CHAR(lex) == 'F' &&
324
+ lex->text[lex->pos - 1] == 'O' &&
325
+ lex->text[lex->pos - 2] == 'E' &&
326
+ lex->text[lex->pos - 3] == '\n');
327
+ }
328
+
329
+ static void process_banner(LexInfo *lex) {
330
+ if (lex->banner_delimiter && is_banner_end_char(lex)) {
331
+ lex->token_length++;
332
+ delimit(lex);
333
+ lex->token_start = lex->pos;
334
+ ADD_TOKEN(lex, ID2SYM(rb_intern("BANNER_END")));
335
+ if (lex->text[lex->pos + 1] == 'C') { lex->pos++; }
336
+ } else if (!lex->banner_delimiter && is_banner_end_string(lex)) {
337
+ lex->token_length -= 1;
338
+ delimit(lex);
339
+ lex->token_start = lex->pos;
340
+ ADD_TOKEN(lex, ID2SYM(rb_intern("BANNER_END")));
341
+ } else {
342
+ lex->token_length++;
343
+ }
344
+ }
345
+
346
+ static void start_banner(LexInfo *lex) {
347
+ char c = CURRENT_CHAR(lex);
348
+ lex->banner_delimiter = (c == '\n') ? 0 : c;
349
+ ADD_TOKEN(lex, ID2SYM(rb_intern("BANNER_BEGIN")));
350
+ if ('\n' == lex->text[lex->pos + 2]) lex->pos++;
351
+ }
352
+
353
+ static void process_start_of_line(LexInfo *lex) {
354
+ char c = CURRENT_CHAR(lex);
355
+
356
+ if (IS_SPACE(c)) {
357
+ lex->indent++;
358
+ return;
359
+ }
360
+
361
+ if (lex->indent > lex->indents[lex->indent_pos]) {
362
+ lex->token_start = lex->pos;
363
+ ADD_TOKEN(lex, ID2SYM(rb_intern("INDENT")));
364
+ lex->indent_pos++;
365
+ lex->indents[lex->indent_pos] = lex->indent;
366
+ } else {
367
+ while (lex->indent_pos >= 1 &&
368
+ lex->indent <= lex->indents[lex->indent_pos-1]) {
369
+ ADD_TOKEN(lex, ID2SYM(rb_intern("DEDENT")));
370
+ lex->indent_pos--;
371
+ }
372
+ }
373
+
374
+ if (IS_LEAD_COMMENT(c)) {
375
+ lex->token_state = LEX_STATE_COMMENT;
376
+ } else {
377
+ process_root(lex);
378
+ }
379
+ }
380
+
381
+ static void process_root(LexInfo *lex) {
382
+ char c;
383
+ c = CURRENT_CHAR(lex);
384
+ lex->token_start = lex->pos;
385
+
386
+ if (IS_SPACE(c)) {
387
+ delimit(lex);
388
+
389
+ } else if (is_banner_begin(lex)) {
390
+ lex->token_state = LEX_STATE_BANNER;
391
+ start_banner(lex);
392
+ lex->pos = lex->pos + 2;
393
+ lex->token_start = lex->pos;
394
+ lex->token_length = 0;
395
+
396
+ } else if (is_certificate(lex)) {
397
+ lex->token_state = LEX_STATE_CERTIFICATE;
398
+ start_certificate(lex);
399
+
400
+ } else if (IS_NEWLINE(c)) {
401
+ process_newline(lex);
402
+
403
+ } else if (IS_COMMENT(c)) {
404
+ lex->token_state = LEX_STATE_COMMENT;
405
+ process_comment(lex);
406
+
407
+ } else if (!(IS_LEAD_ZERO(c)) && IS_DIGIT(c)) {
408
+ lex->token_state = LEX_STATE_INTEGER;
409
+ process_integer(lex);
410
+
411
+ } else if (IS_QUOTE(c)) {
412
+ lex->token_state = LEX_STATE_QUOTED_STRING;
413
+ lex->string_terminator = '\0';
414
+ process_quoted_string(lex);
415
+
416
+ } else if (IS_WORD(c)) {
417
+ lex->token_state = LEX_STATE_WORD;
418
+ process_word(lex);
419
+
420
+ } else {
421
+ rb_raise(rb_eTypeError,
422
+ "Attempted to lex unknown character %c at %d",
423
+ c, (int)lex->pos);
424
+ }
425
+ }
426
+
427
+ static VALUE call(VALUE self, VALUE input_text) {
428
+ LexInfo *lex;
429
+ size_t input_len;
430
+
431
+ if (TYPE(input_text) != T_STRING) {
432
+ rb_raise(rb_eTypeError, "The argument to CLexer#call must be a String.");
433
+ return Qnil;
434
+ }
435
+
436
+ Data_Get_Struct(self, LexInfo, lex);
437
+
438
+ StringValue(input_text);
439
+ lex->text = RSTRING_PTR(input_text);
440
+ input_len = RSTRING_LEN(input_text);
441
+
442
+ for (lex->pos = 0; lex->pos < input_len; lex->pos++) {
443
+ switch(lex->token_state) {
444
+ case (LEX_STATE_ROOT):
445
+ process_root(lex);
446
+ break;
447
+
448
+ case (LEX_STATE_INDENT):
449
+ process_start_of_line(lex);
450
+ break;
451
+
452
+ case (LEX_STATE_INTEGER):
453
+ process_integer(lex);
454
+ break;
455
+
456
+ case (LEX_STATE_DECIMAL):
457
+ process_decimal(lex);
458
+ break;
459
+
460
+ case (LEX_STATE_QUOTED_STRING):
461
+ process_quoted_string(lex);
462
+ break;
463
+
464
+ case (LEX_STATE_WORD):
465
+ process_word(lex);
466
+ break;
467
+
468
+ case (LEX_STATE_COMMENT):
469
+ process_comment(lex);
470
+ break;
471
+
472
+ case (LEX_STATE_BANNER):
473
+ process_banner(lex);
474
+ break;
475
+
476
+ case (LEX_STATE_CERTIFICATE):
477
+ process_certificate(lex);
478
+ break;
479
+ }
480
+ }
481
+
482
+ if (lex->token_state == LEX_STATE_QUOTED_STRING) {
483
+ rb_raise(rb_eLexError,
484
+ "Unterminated quoted string starting at %d: %.*s",
485
+ (int)lex->token_start,
486
+ (int)lex->token_length, &lex->text[lex->token_start]);
487
+ }
488
+
489
+ delimit(lex);
490
+ lex->token_start = lex->pos;
491
+
492
+ for (; lex->indent_pos > 0; lex->indent_pos--) {
493
+ ADD_TOKEN(lex, ID2SYM(rb_intern("DEDENT")));
494
+ }
495
+
496
+ return lex->tokens;
497
+ }
498
+
499
+ void Init_c_lexer() {
500
+ rb_mIOSParser = rb_define_module("IOSParser");
501
+ rb_cCLexer = rb_define_class_under(rb_mIOSParser, "CLexer", rb_cObject);
502
+ rb_eLexError = rb_define_class_under(rb_mIOSParser, "LexError",
503
+ rb_eStandardError);
504
+ rb_define_alloc_func(rb_cCLexer, allocate);
505
+ rb_define_method(rb_cCLexer, "initialize", initialize, 0);
506
+ rb_define_method(rb_cCLexer, "call", call, 1);
507
+ }