ios_parser 0.5.1-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +37 -0
- data/.rubocop.yml +39 -0
- data/.travis.yml +16 -0
- data/CHANGELOG.md +30 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +9 -0
- data/Guardfile +15 -0
- data/LICENSE.txt +675 -0
- data/README.md +90 -0
- data/Rakefile +20 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/doc/state_machine.graphviz +41 -0
- data/doc/state_machine.png +0 -0
- data/ext/ios_parser/c_lexer/extconf.rb +4 -0
- data/ext/ios_parser/c_lexer/lexer.c +507 -0
- data/fixtures/complex_banner.txt +24 -0
- data/ios_parser.gemspec +25 -0
- data/lib/ios_parser/ios/command.rb +91 -0
- data/lib/ios_parser/ios/document.rb +54 -0
- data/lib/ios_parser/ios/queryable.rb +219 -0
- data/lib/ios_parser/ios.rb +73 -0
- data/lib/ios_parser/lexer.rb +327 -0
- data/lib/ios_parser/pure.rb +2 -0
- data/lib/ios_parser/version.rb +7 -0
- data/lib/ios_parser.rb +37 -0
- data/spec/lib/ios_parser/ios/queryable_spec.rb +157 -0
- data/spec/lib/ios_parser/ios_spec.rb +337 -0
- data/spec/lib/ios_parser/lexer_spec.rb +290 -0
- data/spec/lib/ios_parser_spec.rb +96 -0
- data/spec/spec_helper.rb +19 -0
- metadata +121 -0
data/README.md
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
ios_parser
|
2
|
+
==========
|
3
|
+
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/ios_parser.svg)](https://rubygems.org/gems/ios_parser)
|
5
|
+
[![Build Status](https://travis-ci.org/bjmllr/ios_parser.svg)](https://travis-ci.org/bjmllr/ios_parser)
|
6
|
+
|
7
|
+
convert switch and router config files to structured data
|
8
|
+
|
9
|
+
Basic Parsing
|
10
|
+
-------------
|
11
|
+
require 'ios_parser'
|
12
|
+
text = my_method_to_get_a_raw_config
|
13
|
+
config = IOSParser.parse(text)
|
14
|
+
|
15
|
+
JSON Serialization and Deserialization
|
16
|
+
--------------------------------------
|
17
|
+
my_http_client.put_json(config.to_json)
|
18
|
+
config = IOSParser.from_json(my_http_client.get_json)
|
19
|
+
|
20
|
+
Query for a single element (the first to match)
|
21
|
+
-----------------------------------------------
|
22
|
+
config.find('hostname').to_hash
|
23
|
+
# => { :args => ["hostname", "myswitch"], :commands => [] }
|
24
|
+
|
25
|
+
`case`-style Queries
|
26
|
+
--------------------
|
27
|
+
config.find_all(starts_with: ['interface', /Gigabit/])
|
28
|
+
# => [{:args=>["interface", "GigabitEthernet0/1"],
|
29
|
+
# :commands=>[{:args=>["switchport", "mode", "trunk"], :commands=>[]},
|
30
|
+
# {:args=>["logging", "event", "trunk-status"], :commands=>[]},
|
31
|
+
# {:args=>["speed", 1000], :commands=>[]}]},
|
32
|
+
# {:args=>["interface", "GigabitEthernet0/2"],
|
33
|
+
# :commands=>[{:args=>["switchport", "mode", "trunk"], :commands=>[]},
|
34
|
+
# {:args=>["logging", "event", "trunk-status"], :commands=>[]},
|
35
|
+
# {:args=>["speed", 1000], :commands=>[]}]}]
|
36
|
+
|
37
|
+
Chained Queries
|
38
|
+
---------------
|
39
|
+
config.find(starts_with: ['interface', 'GigabitEthernet0/1']).find('speed').args[1]
|
40
|
+
# => 1000
|
41
|
+
|
42
|
+
Nesting Queries
|
43
|
+
---------------
|
44
|
+
`#find_all` returns an `Array`, so you can't chain `IOSParser` queries after it. Instead, you can use nested queries with Ruby's `Array` and `Enumerable` APIs. This is useful to transform and clean data.
|
45
|
+
|
46
|
+
config.find_all("interface").flat_map do |i|
|
47
|
+
s = i.find("speed")
|
48
|
+
s ? [{ interface: i.args.last, speed: s.args.last }] : []
|
49
|
+
end
|
50
|
+
# => [{:interface=>"GigabitEthernet0/1", :speed=>1000},
|
51
|
+
# {:interface=>"GigabitEthernet0/2", :speed=>1000}]
|
52
|
+
|
53
|
+
Compound Query Matchers
|
54
|
+
-----------------------
|
55
|
+
Compound matchers combine or modify the meaning of other matchers. Their argument can be a single hash if all of the affected matchers have different names, and an array of hashes if it is necessary to use the same matcher name with multiple arguments.
|
56
|
+
|
57
|
+
Available Compound Query Matchers
|
58
|
+
---------------------------------
|
59
|
+
* `parent` - matches commands by their parents (e.g., `parent: { starts_with: 'interface' }` will match the first level of subcommands of any interface section)
|
60
|
+
* `any_child` - matches commands that match at least one child command (e.g., `any_child: { name: 'speed' }` will match any command that has a child command starting with `speed`)
|
61
|
+
* `no_child` - matches commands that do not match any child command (e.g., `no_child: { name: 'speed' }` will match commands that do not have a child command starting with `speed`)
|
62
|
+
* `any` - matches commands that match any of an array of queries (e.g., `any: [{ starts_with: 'interface' }, { starts_with: 'ip route' }]` will match all interfaces and all IOS-style static routes)
|
63
|
+
* `all` - matches commands that match all of an array of queries (e.g., `all: { starts_with: 'interface', line: /FastEthernet/ }` will match all FastEthernet interfaces)
|
64
|
+
* `none` - negation of `any`
|
65
|
+
* `not_all` / `not` - negation of `all`
|
66
|
+
|
67
|
+
Available Base Query Matchers
|
68
|
+
-----------------------------
|
69
|
+
* `name` - matches the first argument of a command (e.g., `name: ip` will match `ip route` or `ip http server`)
|
70
|
+
* `starts_with` - matches the leading arguments of a command
|
71
|
+
* `contains` - matches any sequence of arguments of a command
|
72
|
+
* `ends_with` - matches the trailling arguments of a command
|
73
|
+
* `line` - matches the string form of a command (all the arguments separated by single spaces)
|
74
|
+
* `depth` - matches based on how many command sections contain the command (e.g., `depth: 0` will only match top-level commands), accepts integers and integer ranges
|
75
|
+
|
76
|
+
## Development
|
77
|
+
|
78
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
79
|
+
|
80
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
81
|
+
|
82
|
+
## Contributing
|
83
|
+
|
84
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/bjmllr/ios_parser. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
85
|
+
|
86
|
+
## Copyright and License
|
87
|
+
|
88
|
+
Copyright (C) 2016 Ben Miller
|
89
|
+
|
90
|
+
The gem is available as free software under the terms of the [GNU General Public License, Version 3](http://www.gnu.org/licenses/gpl-3.0.html).
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
|
5
|
+
RSpec::Core::RakeTask.new(:spec)
|
6
|
+
|
7
|
+
require 'rake/extensiontask'
|
8
|
+
spec = Gem::Specification.load('ios_parser.gemspec')
|
9
|
+
Rake::ExtensionTask.new do |ext|
|
10
|
+
ext.name = 'c_lexer'
|
11
|
+
ext.ext_dir = 'ext/ios_parser/c_lexer'
|
12
|
+
ext.lib_dir = 'lib/ios_parser'
|
13
|
+
ext.gem_spec = spec
|
14
|
+
end
|
15
|
+
|
16
|
+
if RUBY_ENGINE == 'jruby'
|
17
|
+
task default: :spec
|
18
|
+
else
|
19
|
+
task default: [:compile, :spec]
|
20
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'ios_parser'
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require 'irb'
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Generate png with
|
2
|
+
# dot state_machine.graphviz -Tpng > state_machine.png
|
3
|
+
|
4
|
+
digraph g{
|
5
|
+
rankdir="LR";
|
6
|
+
node [shape = circle];
|
7
|
+
# from root
|
8
|
+
LEX_STATE_ROOT -> LEX_STATE_BANNER;
|
9
|
+
LEX_STATE_ROOT -> LEX_STATE_CERTIFICATE;
|
10
|
+
LEX_STATE_ROOT -> LEX_STATE_COMMENT;
|
11
|
+
LEX_STATE_ROOT -> LEX_STATE_INTEGER;
|
12
|
+
LEX_STATE_ROOT -> LEX_STATE_WORD;
|
13
|
+
LEX_STATE_ROOT -> LEX_STATE_INDENT;
|
14
|
+
# from certificate
|
15
|
+
LEX_STATE_CERTIFICATE -> LEX_STATE_INDENT;
|
16
|
+
LEX_STATE_CERTIFICATE -> LEX_STATE_ROOT;
|
17
|
+
# from indent
|
18
|
+
LEX_STATE_INDENT -> LEX_STATE_ROOT;
|
19
|
+
LEX_STATE_INDENT -> LEX_STATE_BANNER;
|
20
|
+
LEX_STATE_INDENT -> LEX_STATE_CERTIFICATE;
|
21
|
+
LEX_STATE_INDENT -> LEX_STATE_COMMENT;
|
22
|
+
LEX_STATE_INDENT -> LEX_STATE_INTEGER;
|
23
|
+
LEX_STATE_INDENT -> LEX_STATE_WORD;
|
24
|
+
# from comment
|
25
|
+
LEX_STATE_COMMENT -> LEX_STATE_ROOT;
|
26
|
+
# from integer
|
27
|
+
LEX_STATE_INTEGER -> LEX_STATE_DECIMAL;
|
28
|
+
LEX_STATE_INTEGER -> LEX_STATE_ROOT;
|
29
|
+
LEX_STATE_INTEGER -> LEX_STATE_INDENT;
|
30
|
+
LEX_STATE_INTEGER -> LEX_STATE_WORD;
|
31
|
+
# from decimal
|
32
|
+
LEX_STATE_DECIMAL -> LEX_STATE_WORD;
|
33
|
+
LEX_STATE_DECIMAL -> LEX_STATE_ROOT;
|
34
|
+
LEX_STATE_DECIMAL -> LEX_STATE_INDENT;
|
35
|
+
# from word
|
36
|
+
LEX_STATE_WORD -> LEX_STATE_ROOT;
|
37
|
+
LEX_STATE_WORD -> LEX_STATE_INDENT;
|
38
|
+
# from banner
|
39
|
+
LEX_STATE_BANNER -> LEX_STATE_ROOT;
|
40
|
+
|
41
|
+
}
|
Binary file
|
@@ -0,0 +1,507 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
|
3
|
+
static VALUE rb_mIOSParser;
|
4
|
+
static VALUE rb_cCLexer;
|
5
|
+
VALUE rb_eLexError;
|
6
|
+
|
7
|
+
typedef enum lex_token_state {
|
8
|
+
LEX_STATE_ROOT,
|
9
|
+
LEX_STATE_INTEGER,
|
10
|
+
LEX_STATE_DECIMAL,
|
11
|
+
LEX_STATE_QUOTED_STRING,
|
12
|
+
LEX_STATE_WORD,
|
13
|
+
LEX_STATE_COMMENT,
|
14
|
+
LEX_STATE_BANNER,
|
15
|
+
LEX_STATE_CERTIFICATE,
|
16
|
+
LEX_STATE_INDENT,
|
17
|
+
} lex_token_state;
|
18
|
+
|
19
|
+
struct LexInfo {
|
20
|
+
char *text;
|
21
|
+
size_t pos;
|
22
|
+
size_t token_start;
|
23
|
+
size_t token_length;
|
24
|
+
lex_token_state token_state;
|
25
|
+
VALUE tokens;
|
26
|
+
int indent;
|
27
|
+
int indent_pos;
|
28
|
+
int indents[100];
|
29
|
+
char banner_delimiter;
|
30
|
+
char string_terminator;
|
31
|
+
};
|
32
|
+
typedef struct LexInfo LexInfo;
|
33
|
+
|
34
|
+
#define IS_SPACE(C) C == ' ' || C == '\t' || C == '\r'
|
35
|
+
#define IS_NEWLINE(C) C == '\n'
|
36
|
+
#define IS_COMMENT(C) C == '!'
|
37
|
+
#define IS_DIGIT(C) '0' <= C && C <= '9'
|
38
|
+
#define IS_DOT(C) C == '.'
|
39
|
+
#define IS_DECIMAL(C) IS_DIGIT(C) || IS_DOT(C)
|
40
|
+
#define IS_LETTER(C) 'a' <= C && C <= 'z' || 'A' <= C && C <= 'Z'
|
41
|
+
#define IS_PUNCT(C) strchr("-+$:/,()|*#=<>!\"\\&@;%~{}'\"?[]_^`", C)
|
42
|
+
#define IS_WORD(C) IS_DECIMAL(C) || IS_LETTER(C) || IS_PUNCT(C)
|
43
|
+
#define IS_LEAD_ZERO(C) C == '0'
|
44
|
+
#define IS_QUOTE(C) C == '"' || C == '\''
|
45
|
+
#define IS_LEAD_COMMENT(C) C == '#' || C == '!'
|
46
|
+
|
47
|
+
#define CURRENT_CHAR(LEX) LEX->text[LEX->pos]
|
48
|
+
#define TOKEN_EMPTY(LEX) LEX->token_length <= 0
|
49
|
+
|
50
|
+
#define MAKE_TOKEN(LEX, TOK) rb_ary_new3(2, rb_int_new(LEX->token_start), TOK)
|
51
|
+
#define ADD_TOKEN(LEX, TOK) rb_ary_push(LEX->tokens, MAKE_TOKEN(LEX, TOK))
|
52
|
+
|
53
|
+
#define CMD_LEN(CMD) (sizeof(CMD) - 1)
|
54
|
+
int is_certificate(LexInfo *lex) {
|
55
|
+
VALUE indent_ary, indent, command_ary, command;
|
56
|
+
int token_count, indent_pos, command_pos;
|
57
|
+
|
58
|
+
token_count = RARRAY_LEN(lex->tokens);
|
59
|
+
indent_pos = token_count - 6;
|
60
|
+
if (indent_pos < 0) { return 0; }
|
61
|
+
|
62
|
+
command_pos = token_count - 5;
|
63
|
+
if (command_pos < 0) { return 0; }
|
64
|
+
|
65
|
+
indent_ary = rb_ary_entry(lex->tokens, indent_pos);
|
66
|
+
indent = rb_ary_entry(indent_ary, 1);
|
67
|
+
if (TYPE(indent) != T_SYMBOL) { return 0; }
|
68
|
+
if (rb_intern("INDENT") != SYM2ID(indent)) { return 0; }
|
69
|
+
|
70
|
+
command_ary = rb_ary_entry(lex->tokens, command_pos);
|
71
|
+
if (TYPE(command_ary) != T_ARRAY) { return 0; }
|
72
|
+
if (RARRAY_LEN(command_ary) < 2) { return 0; }
|
73
|
+
|
74
|
+
command = rb_ary_entry(command_ary, 1);
|
75
|
+
if (TYPE(command) != T_STRING) { return 0; }
|
76
|
+
|
77
|
+
StringValue(command);
|
78
|
+
if (RSTRING_LEN(command) != CMD_LEN("certificate")) { return 0; }
|
79
|
+
if (0 != strncmp(RSTRING_PTR(command), "certificate", 11)) { return 0; }
|
80
|
+
|
81
|
+
return 1;
|
82
|
+
}
|
83
|
+
|
84
|
+
int is_banner_begin(LexInfo *lex) {
|
85
|
+
VALUE banner_ary, banner;
|
86
|
+
int token_count = RARRAY_LEN(lex->tokens);
|
87
|
+
int banner_pos = token_count - 2;
|
88
|
+
|
89
|
+
if (banner_pos < 0) { return 0; }
|
90
|
+
|
91
|
+
banner_ary = rb_ary_entry(lex->tokens, banner_pos);
|
92
|
+
banner = rb_ary_entry(banner_ary, 1);
|
93
|
+
if (TYPE(banner) != T_STRING) { return 0; }
|
94
|
+
|
95
|
+
StringValue(banner);
|
96
|
+
if (RSTRING_LEN(banner) != CMD_LEN("banner")) { return 0; }
|
97
|
+
if (0 != strncmp(RSTRING_PTR(banner), "banner", 6)) { return 0; }
|
98
|
+
|
99
|
+
return 1;
|
100
|
+
}
|
101
|
+
|
102
|
+
static void delimit(LexInfo *lex) {
|
103
|
+
VALUE token;
|
104
|
+
char string[lex->token_length + 1];
|
105
|
+
|
106
|
+
if (TOKEN_EMPTY(lex)) {
|
107
|
+
lex->token_state = LEX_STATE_ROOT;
|
108
|
+
return;
|
109
|
+
}
|
110
|
+
|
111
|
+
switch (lex->token_state) {
|
112
|
+
case (LEX_STATE_QUOTED_STRING):
|
113
|
+
case (LEX_STATE_WORD):
|
114
|
+
case (LEX_STATE_BANNER):
|
115
|
+
case (LEX_STATE_CERTIFICATE):
|
116
|
+
token = rb_str_new(&lex->text[lex->token_start], lex->token_length);
|
117
|
+
break;
|
118
|
+
|
119
|
+
case (LEX_STATE_INTEGER):
|
120
|
+
strncpy(string, &lex->text[lex->token_start], lex->token_length);
|
121
|
+
string[lex->token_length] = '\0';
|
122
|
+
token = rb_int_new(atoi(string));
|
123
|
+
break;
|
124
|
+
|
125
|
+
case (LEX_STATE_DECIMAL):
|
126
|
+
strncpy(string, &lex->text[lex->token_start], lex->token_length);
|
127
|
+
string[lex->token_length] = '\0';
|
128
|
+
token = rb_float_new(atof(string));
|
129
|
+
break;
|
130
|
+
|
131
|
+
case (LEX_STATE_COMMENT):
|
132
|
+
lex->token_state = LEX_STATE_ROOT;
|
133
|
+
return;
|
134
|
+
|
135
|
+
default:
|
136
|
+
rb_raise(rb_eRuntimeError,
|
137
|
+
"Unable to commit token %s at %d",
|
138
|
+
string, (int)lex->pos);
|
139
|
+
return;
|
140
|
+
}
|
141
|
+
|
142
|
+
ADD_TOKEN(lex, token);
|
143
|
+
lex->token_state = LEX_STATE_ROOT;
|
144
|
+
lex->token_length = 0;
|
145
|
+
}
|
146
|
+
|
147
|
+
static void deallocate(void * lex) {
|
148
|
+
xfree(lex);
|
149
|
+
}
|
150
|
+
|
151
|
+
static void mark(void *ptr) {
|
152
|
+
LexInfo *lex = (LexInfo *)ptr;
|
153
|
+
rb_gc_mark(lex->tokens);
|
154
|
+
}
|
155
|
+
|
156
|
+
static VALUE allocate(VALUE klass) {
|
157
|
+
LexInfo * lex = ALLOC(LexInfo);
|
158
|
+
return Data_Wrap_Struct(klass, mark, deallocate, lex);
|
159
|
+
}
|
160
|
+
|
161
|
+
static VALUE initialize(VALUE self, VALUE input_text) {
|
162
|
+
LexInfo *lex;
|
163
|
+
Data_Get_Struct(self, LexInfo, lex);
|
164
|
+
|
165
|
+
lex->text = NULL;
|
166
|
+
lex->pos = 0;
|
167
|
+
lex->token_start = 0;
|
168
|
+
lex->token_length = 0;
|
169
|
+
lex->token_state = LEX_STATE_ROOT;
|
170
|
+
lex->tokens = rb_ary_new();
|
171
|
+
|
172
|
+
lex->indent = 0;
|
173
|
+
lex->indent_pos = 0;
|
174
|
+
lex->indents[0] = 0;
|
175
|
+
|
176
|
+
return self;
|
177
|
+
}
|
178
|
+
|
179
|
+
static void process_root(LexInfo * lex);
|
180
|
+
static void process_start_of_line(LexInfo * lex);
|
181
|
+
static void start_banner(LexInfo * lex);
|
182
|
+
|
183
|
+
static void process_newline(LexInfo *lex) {
|
184
|
+
delimit(lex);
|
185
|
+
|
186
|
+
if (is_banner_begin(lex)) {
|
187
|
+
lex->token_state = LEX_STATE_BANNER;
|
188
|
+
start_banner(lex);
|
189
|
+
lex->pos = lex->pos + 1;
|
190
|
+
lex->token_start = lex->pos;
|
191
|
+
lex->token_length = 0;
|
192
|
+
return;
|
193
|
+
}
|
194
|
+
|
195
|
+
lex->token_start = lex->pos;
|
196
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("EOL")));
|
197
|
+
lex->token_state = LEX_STATE_INDENT;
|
198
|
+
lex->indent = 0;
|
199
|
+
}
|
200
|
+
|
201
|
+
static void process_space(LexInfo *lex) {
|
202
|
+
delimit(lex);
|
203
|
+
}
|
204
|
+
|
205
|
+
static void process_comment(LexInfo *lex) {
|
206
|
+
char c = CURRENT_CHAR(lex);
|
207
|
+
|
208
|
+
if (IS_NEWLINE(c)) {
|
209
|
+
delimit(lex);
|
210
|
+
lex->token_state = LEX_STATE_INDENT;
|
211
|
+
lex->indent = 0;
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
static void process_quoted_string(LexInfo *lex) {
|
216
|
+
char c = CURRENT_CHAR(lex);
|
217
|
+
|
218
|
+
lex->token_length++;
|
219
|
+
if (!lex->string_terminator) {
|
220
|
+
lex->string_terminator = c;
|
221
|
+
} else if (c == lex->string_terminator) {
|
222
|
+
delimit(lex);
|
223
|
+
}
|
224
|
+
}
|
225
|
+
|
226
|
+
static void process_word(LexInfo *lex) {
|
227
|
+
char c = CURRENT_CHAR(lex);
|
228
|
+
|
229
|
+
if (IS_WORD(c)) {
|
230
|
+
lex->token_length++;
|
231
|
+
} else if (IS_SPACE(c)) {
|
232
|
+
process_space(lex);
|
233
|
+
} else if (IS_NEWLINE(c)) {
|
234
|
+
process_newline(lex);
|
235
|
+
}
|
236
|
+
}
|
237
|
+
|
238
|
+
static void process_decimal(LexInfo *lex) {
|
239
|
+
char c = CURRENT_CHAR(lex);
|
240
|
+
|
241
|
+
if (IS_DIGIT(c)) {
|
242
|
+
lex->token_length++;
|
243
|
+
} else if (IS_WORD(c)) {
|
244
|
+
lex->token_length++;
|
245
|
+
lex->token_state = LEX_STATE_WORD;
|
246
|
+
} else if (IS_SPACE(c)) {
|
247
|
+
process_space(lex);
|
248
|
+
} else if (IS_NEWLINE(c)) {
|
249
|
+
process_newline(lex);
|
250
|
+
}
|
251
|
+
}
|
252
|
+
|
253
|
+
static void process_integer(LexInfo *lex) {
|
254
|
+
char c = CURRENT_CHAR(lex);
|
255
|
+
|
256
|
+
if (IS_DIGIT(c)) {
|
257
|
+
lex->token_length++;
|
258
|
+
} else if (c == '.') {
|
259
|
+
lex->token_length++;
|
260
|
+
lex->token_state = LEX_STATE_DECIMAL;
|
261
|
+
} else if (IS_SPACE(c)) {
|
262
|
+
process_space(lex);
|
263
|
+
} else if (IS_NEWLINE(c)) {
|
264
|
+
process_newline(lex);
|
265
|
+
} else if (IS_WORD(c)) {
|
266
|
+
process_word(lex);
|
267
|
+
lex->token_state = LEX_STATE_WORD;
|
268
|
+
}
|
269
|
+
}
|
270
|
+
|
271
|
+
static void process_certificate(LexInfo *lex) {
|
272
|
+
char quit[5];
|
273
|
+
|
274
|
+
strncpy(quit, &CURRENT_CHAR(lex) - 5, 5);
|
275
|
+
|
276
|
+
if (0 == strncmp("quit\n", quit, 5)) {
|
277
|
+
int length = lex->token_length;
|
278
|
+
VALUE token;
|
279
|
+
|
280
|
+
length = length - 5;
|
281
|
+
while(' ' == lex->text[lex->token_start + length - 1]) {
|
282
|
+
length--;
|
283
|
+
}
|
284
|
+
lex->token_length = length;
|
285
|
+
|
286
|
+
token = rb_str_new(&lex->text[lex->token_start], lex->token_length);
|
287
|
+
|
288
|
+
rb_funcall(token, rb_intern("gsub!"), 2,
|
289
|
+
rb_str_new2("\n"), rb_str_new2(""));
|
290
|
+
|
291
|
+
rb_funcall(token, rb_intern("gsub!"), 2,
|
292
|
+
rb_str_new2(" "), rb_str_new2(" "));
|
293
|
+
|
294
|
+
ADD_TOKEN(lex, token);
|
295
|
+
lex->token_length = 0;
|
296
|
+
|
297
|
+
lex->token_start = lex->pos;
|
298
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("CERTIFICATE_END")));
|
299
|
+
|
300
|
+
process_newline(lex);
|
301
|
+
process_start_of_line(lex);
|
302
|
+
} else {
|
303
|
+
lex->token_length++;
|
304
|
+
}
|
305
|
+
}
|
306
|
+
|
307
|
+
static void start_certificate(LexInfo *lex) {
|
308
|
+
lex->indent_pos--;
|
309
|
+
rb_ary_pop(lex->tokens);
|
310
|
+
rb_ary_pop(lex->tokens);
|
311
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("CERTIFICATE_BEGIN")));
|
312
|
+
process_certificate(lex);
|
313
|
+
}
|
314
|
+
|
315
|
+
int is_banner_end_char(LexInfo *lex) {
|
316
|
+
return CURRENT_CHAR(lex) == lex->banner_delimiter &&
|
317
|
+
(0 < lex->pos && '\n' == lex->text[lex->pos - 1] ||
|
318
|
+
'\n' == lex->text[lex->pos + 1]);
|
319
|
+
}
|
320
|
+
|
321
|
+
int is_banner_end_string(LexInfo *lex) {
|
322
|
+
/* onlys accept the banner-ending string "EOF" */
|
323
|
+
return (CURRENT_CHAR(lex) == 'F' &&
|
324
|
+
lex->text[lex->pos - 1] == 'O' &&
|
325
|
+
lex->text[lex->pos - 2] == 'E' &&
|
326
|
+
lex->text[lex->pos - 3] == '\n');
|
327
|
+
}
|
328
|
+
|
329
|
+
static void process_banner(LexInfo *lex) {
|
330
|
+
if (lex->banner_delimiter && is_banner_end_char(lex)) {
|
331
|
+
lex->token_length++;
|
332
|
+
delimit(lex);
|
333
|
+
lex->token_start = lex->pos;
|
334
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("BANNER_END")));
|
335
|
+
if (lex->text[lex->pos + 1] == 'C') { lex->pos++; }
|
336
|
+
} else if (!lex->banner_delimiter && is_banner_end_string(lex)) {
|
337
|
+
lex->token_length -= 1;
|
338
|
+
delimit(lex);
|
339
|
+
lex->token_start = lex->pos;
|
340
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("BANNER_END")));
|
341
|
+
} else {
|
342
|
+
lex->token_length++;
|
343
|
+
}
|
344
|
+
}
|
345
|
+
|
346
|
+
static void start_banner(LexInfo *lex) {
|
347
|
+
char c = CURRENT_CHAR(lex);
|
348
|
+
lex->banner_delimiter = (c == '\n') ? 0 : c;
|
349
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("BANNER_BEGIN")));
|
350
|
+
if ('\n' == lex->text[lex->pos + 2]) lex->pos++;
|
351
|
+
}
|
352
|
+
|
353
|
+
static void process_start_of_line(LexInfo *lex) {
|
354
|
+
char c = CURRENT_CHAR(lex);
|
355
|
+
|
356
|
+
if (IS_SPACE(c)) {
|
357
|
+
lex->indent++;
|
358
|
+
return;
|
359
|
+
}
|
360
|
+
|
361
|
+
if (lex->indent > lex->indents[lex->indent_pos]) {
|
362
|
+
lex->token_start = lex->pos;
|
363
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("INDENT")));
|
364
|
+
lex->indent_pos++;
|
365
|
+
lex->indents[lex->indent_pos] = lex->indent;
|
366
|
+
} else {
|
367
|
+
while (lex->indent_pos >= 1 &&
|
368
|
+
lex->indent <= lex->indents[lex->indent_pos-1]) {
|
369
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("DEDENT")));
|
370
|
+
lex->indent_pos--;
|
371
|
+
}
|
372
|
+
}
|
373
|
+
|
374
|
+
if (IS_LEAD_COMMENT(c)) {
|
375
|
+
lex->token_state = LEX_STATE_COMMENT;
|
376
|
+
} else {
|
377
|
+
process_root(lex);
|
378
|
+
}
|
379
|
+
}
|
380
|
+
|
381
|
+
static void process_root(LexInfo *lex) {
|
382
|
+
char c;
|
383
|
+
c = CURRENT_CHAR(lex);
|
384
|
+
lex->token_start = lex->pos;
|
385
|
+
|
386
|
+
if (IS_SPACE(c)) {
|
387
|
+
delimit(lex);
|
388
|
+
|
389
|
+
} else if (is_banner_begin(lex)) {
|
390
|
+
lex->token_state = LEX_STATE_BANNER;
|
391
|
+
start_banner(lex);
|
392
|
+
lex->pos = lex->pos + 2;
|
393
|
+
lex->token_start = lex->pos;
|
394
|
+
lex->token_length = 0;
|
395
|
+
|
396
|
+
} else if (is_certificate(lex)) {
|
397
|
+
lex->token_state = LEX_STATE_CERTIFICATE;
|
398
|
+
start_certificate(lex);
|
399
|
+
|
400
|
+
} else if (IS_NEWLINE(c)) {
|
401
|
+
process_newline(lex);
|
402
|
+
|
403
|
+
} else if (IS_COMMENT(c)) {
|
404
|
+
lex->token_state = LEX_STATE_COMMENT;
|
405
|
+
process_comment(lex);
|
406
|
+
|
407
|
+
} else if (!(IS_LEAD_ZERO(c)) && IS_DIGIT(c)) {
|
408
|
+
lex->token_state = LEX_STATE_INTEGER;
|
409
|
+
process_integer(lex);
|
410
|
+
|
411
|
+
} else if (IS_QUOTE(c)) {
|
412
|
+
lex->token_state = LEX_STATE_QUOTED_STRING;
|
413
|
+
lex->string_terminator = '\0';
|
414
|
+
process_quoted_string(lex);
|
415
|
+
|
416
|
+
} else if (IS_WORD(c)) {
|
417
|
+
lex->token_state = LEX_STATE_WORD;
|
418
|
+
process_word(lex);
|
419
|
+
|
420
|
+
} else {
|
421
|
+
rb_raise(rb_eTypeError,
|
422
|
+
"Attempted to lex unknown character %c at %d",
|
423
|
+
c, (int)lex->pos);
|
424
|
+
}
|
425
|
+
}
|
426
|
+
|
427
|
+
static VALUE call(VALUE self, VALUE input_text) {
|
428
|
+
LexInfo *lex;
|
429
|
+
size_t input_len;
|
430
|
+
|
431
|
+
if (TYPE(input_text) != T_STRING) {
|
432
|
+
rb_raise(rb_eTypeError, "The argument to CLexer#call must be a String.");
|
433
|
+
return Qnil;
|
434
|
+
}
|
435
|
+
|
436
|
+
Data_Get_Struct(self, LexInfo, lex);
|
437
|
+
|
438
|
+
StringValue(input_text);
|
439
|
+
lex->text = RSTRING_PTR(input_text);
|
440
|
+
input_len = RSTRING_LEN(input_text);
|
441
|
+
|
442
|
+
for (lex->pos = 0; lex->pos < input_len; lex->pos++) {
|
443
|
+
switch(lex->token_state) {
|
444
|
+
case (LEX_STATE_ROOT):
|
445
|
+
process_root(lex);
|
446
|
+
break;
|
447
|
+
|
448
|
+
case (LEX_STATE_INDENT):
|
449
|
+
process_start_of_line(lex);
|
450
|
+
break;
|
451
|
+
|
452
|
+
case (LEX_STATE_INTEGER):
|
453
|
+
process_integer(lex);
|
454
|
+
break;
|
455
|
+
|
456
|
+
case (LEX_STATE_DECIMAL):
|
457
|
+
process_decimal(lex);
|
458
|
+
break;
|
459
|
+
|
460
|
+
case (LEX_STATE_QUOTED_STRING):
|
461
|
+
process_quoted_string(lex);
|
462
|
+
break;
|
463
|
+
|
464
|
+
case (LEX_STATE_WORD):
|
465
|
+
process_word(lex);
|
466
|
+
break;
|
467
|
+
|
468
|
+
case (LEX_STATE_COMMENT):
|
469
|
+
process_comment(lex);
|
470
|
+
break;
|
471
|
+
|
472
|
+
case (LEX_STATE_BANNER):
|
473
|
+
process_banner(lex);
|
474
|
+
break;
|
475
|
+
|
476
|
+
case (LEX_STATE_CERTIFICATE):
|
477
|
+
process_certificate(lex);
|
478
|
+
break;
|
479
|
+
}
|
480
|
+
}
|
481
|
+
|
482
|
+
if (lex->token_state == LEX_STATE_QUOTED_STRING) {
|
483
|
+
rb_raise(rb_eLexError,
|
484
|
+
"Unterminated quoted string starting at %d: %.*s",
|
485
|
+
(int)lex->token_start,
|
486
|
+
(int)lex->token_length, &lex->text[lex->token_start]);
|
487
|
+
}
|
488
|
+
|
489
|
+
delimit(lex);
|
490
|
+
lex->token_start = lex->pos;
|
491
|
+
|
492
|
+
for (; lex->indent_pos > 0; lex->indent_pos--) {
|
493
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("DEDENT")));
|
494
|
+
}
|
495
|
+
|
496
|
+
return lex->tokens;
|
497
|
+
}
|
498
|
+
|
499
|
+
void Init_c_lexer() {
|
500
|
+
rb_mIOSParser = rb_define_module("IOSParser");
|
501
|
+
rb_cCLexer = rb_define_class_under(rb_mIOSParser, "CLexer", rb_cObject);
|
502
|
+
rb_eLexError = rb_define_class_under(rb_mIOSParser, "LexError",
|
503
|
+
rb_eStandardError);
|
504
|
+
rb_define_alloc_func(rb_cCLexer, allocate);
|
505
|
+
rb_define_method(rb_cCLexer, "initialize", initialize, 0);
|
506
|
+
rb_define_method(rb_cCLexer, "call", call, 1);
|
507
|
+
}
|