ios_parser 0.5.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +37 -0
- data/.rubocop.yml +39 -0
- data/.travis.yml +16 -0
- data/CHANGELOG.md +30 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +9 -0
- data/Guardfile +15 -0
- data/LICENSE.txt +675 -0
- data/README.md +90 -0
- data/Rakefile +20 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/doc/state_machine.graphviz +41 -0
- data/doc/state_machine.png +0 -0
- data/ext/ios_parser/c_lexer/extconf.rb +4 -0
- data/ext/ios_parser/c_lexer/lexer.c +507 -0
- data/fixtures/complex_banner.txt +24 -0
- data/ios_parser.gemspec +25 -0
- data/lib/ios_parser/ios/command.rb +91 -0
- data/lib/ios_parser/ios/document.rb +54 -0
- data/lib/ios_parser/ios/queryable.rb +219 -0
- data/lib/ios_parser/ios.rb +73 -0
- data/lib/ios_parser/lexer.rb +327 -0
- data/lib/ios_parser/pure.rb +2 -0
- data/lib/ios_parser/version.rb +7 -0
- data/lib/ios_parser.rb +37 -0
- data/spec/lib/ios_parser/ios/queryable_spec.rb +157 -0
- data/spec/lib/ios_parser/ios_spec.rb +337 -0
- data/spec/lib/ios_parser/lexer_spec.rb +290 -0
- data/spec/lib/ios_parser_spec.rb +96 -0
- data/spec/spec_helper.rb +19 -0
- metadata +121 -0
data/README.md
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
ios_parser
|
2
|
+
==========
|
3
|
+
|
4
|
+
[](https://rubygems.org/gems/ios_parser)
|
5
|
+
[](https://travis-ci.org/bjmllr/ios_parser)
|
6
|
+
|
7
|
+
convert switch and router config files to structured data
|
8
|
+
|
9
|
+
Basic Parsing
|
10
|
+
-------------
|
11
|
+
require 'ios_parser'
|
12
|
+
text = my_method_to_get_a_raw_config
|
13
|
+
config = IOSParser.parse(text)
|
14
|
+
|
15
|
+
JSON Serialization and Deserialization
|
16
|
+
--------------------------------------
|
17
|
+
my_http_client.put_json(config.to_json)
|
18
|
+
config = IOSParser.from_json(my_http_client.get_json)
|
19
|
+
|
20
|
+
Query for a single element (the first to match)
|
21
|
+
-----------------------------------------------
|
22
|
+
config.find('hostname').to_hash
|
23
|
+
# => { :args => ["hostname", "myswitch"], :commands => [] }
|
24
|
+
|
25
|
+
`case`-style Queries
|
26
|
+
--------------------
|
27
|
+
config.find_all(starts_with: ['interface', /Gigabit/])
|
28
|
+
# => [{:args=>["interface", "GigabitEthernet0/1"],
|
29
|
+
# :commands=>[{:args=>["switchport", "mode", "trunk"], :commands=>[]},
|
30
|
+
# {:args=>["logging", "event", "trunk-status"], :commands=>[]},
|
31
|
+
# {:args=>["speed", 1000], :commands=>[]}]},
|
32
|
+
# {:args=>["interface", "GigabitEthernet0/2"],
|
33
|
+
# :commands=>[{:args=>["switchport", "mode", "trunk"], :commands=>[]},
|
34
|
+
# {:args=>["logging", "event", "trunk-status"], :commands=>[]},
|
35
|
+
# {:args=>["speed", 1000], :commands=>[]}]}]
|
36
|
+
|
37
|
+
Chained Queries
|
38
|
+
---------------
|
39
|
+
config.find(starts_with: ['interface', 'GigabitEthernet0/1']).find('speed').args[1]
|
40
|
+
# => 1000
|
41
|
+
|
42
|
+
Nesting Queries
|
43
|
+
---------------
|
44
|
+
`#find_all` returns an `Array`, so you can't chain `IOSParser` queries after it. Instead, you can use nested queries with Ruby's `Array` and `Enumerable` APIs. This is useful to transform and clean data.
|
45
|
+
|
46
|
+
config.find_all("interface").flat_map do |i|
|
47
|
+
s = i.find("speed")
|
48
|
+
s ? [{ interface: i.args.last, speed: s.args.last }] : []
|
49
|
+
end
|
50
|
+
# => [{:interface=>"GigabitEthernet0/1", :speed=>1000},
|
51
|
+
# {:interface=>"GigabitEthernet0/2", :speed=>1000}]
|
52
|
+
|
53
|
+
Compound Query Matchers
|
54
|
+
-----------------------
|
55
|
+
Compound matchers combine or modify the meaning of other matchers. Their argument can be a single hash if all of the affected matchers have different names, and an array of hashes if it is necessary to use the same matcher name with multiple arguments.
|
56
|
+
|
57
|
+
Available Compound Query Matchers
|
58
|
+
---------------------------------
|
59
|
+
* `parent` - matches commands by their parents (e.g., `parent: { starts_with: 'interface' }` will match the first level of subcommands of any interface section)
|
60
|
+
* `any_child` - matches commands that match at least one child command (e.g., `any_child: { name: 'speed' }` will match any command that has a child command starting with `speed`)
|
61
|
+
* `no_child` - matches commands that do not match any child command (e.g., `no_child: { name: 'speed' }` will match commands that do not have a child command starting with `speed`)
|
62
|
+
* `any` - matches commands that match any of an array of queries (e.g., `any: [{ starts_with: 'interface' }, { starts_with: 'ip route' }]` will match all interfaces and all IOS-style static routes)
|
63
|
+
* `all` - matches commands that match all of an array of queries (e.g., `all: { starts_with: 'interface', line: /FastEthernet/ }` will match all FastEthernet interfaces)
|
64
|
+
* `none` - negation of `any`
|
65
|
+
* `not_all` / `not` - negation of `all`
|
66
|
+
|
67
|
+
Available Base Query Matchers
|
68
|
+
-----------------------------
|
69
|
+
* `name` - matches the first argument of a command (e.g., `name: ip` will match `ip route` or `ip http server`)
|
70
|
+
* `starts_with` - matches the leading arguments of a command
|
71
|
+
* `contains` - matches any sequence of arguments of a command
|
72
|
+
* `ends_with` - matches the trailling arguments of a command
|
73
|
+
* `line` - matches the string form of a command (all the arguments separated by single spaces)
|
74
|
+
* `depth` - matches based on how many command sections contain the command (e.g., `depth: 0` will only match top-level commands), accepts integers and integer ranges
|
75
|
+
|
76
|
+
## Development
|
77
|
+
|
78
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
79
|
+
|
80
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
81
|
+
|
82
|
+
## Contributing
|
83
|
+
|
84
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/bjmllr/ios_parser. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
85
|
+
|
86
|
+
## Copyright and License
|
87
|
+
|
88
|
+
Copyright (C) 2016 Ben Miller
|
89
|
+
|
90
|
+
The gem is available as free software under the terms of the [GNU General Public License, Version 3](http://www.gnu.org/licenses/gpl-3.0.html).
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
|
5
|
+
RSpec::Core::RakeTask.new(:spec)
|
6
|
+
|
7
|
+
require 'rake/extensiontask'
|
8
|
+
spec = Gem::Specification.load('ios_parser.gemspec')
|
9
|
+
Rake::ExtensionTask.new do |ext|
|
10
|
+
ext.name = 'c_lexer'
|
11
|
+
ext.ext_dir = 'ext/ios_parser/c_lexer'
|
12
|
+
ext.lib_dir = 'lib/ios_parser'
|
13
|
+
ext.gem_spec = spec
|
14
|
+
end
|
15
|
+
|
16
|
+
if RUBY_ENGINE == 'jruby'
|
17
|
+
task default: :spec
|
18
|
+
else
|
19
|
+
task default: [:compile, :spec]
|
20
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'ios_parser'
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require 'irb'
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Generate png with
|
2
|
+
# dot state_machine.graphviz -Tpng > state_machine.png
|
3
|
+
|
4
|
+
digraph g{
|
5
|
+
rankdir="LR";
|
6
|
+
node [shape = circle];
|
7
|
+
# from root
|
8
|
+
LEX_STATE_ROOT -> LEX_STATE_BANNER;
|
9
|
+
LEX_STATE_ROOT -> LEX_STATE_CERTIFICATE;
|
10
|
+
LEX_STATE_ROOT -> LEX_STATE_COMMENT;
|
11
|
+
LEX_STATE_ROOT -> LEX_STATE_INTEGER;
|
12
|
+
LEX_STATE_ROOT -> LEX_STATE_WORD;
|
13
|
+
LEX_STATE_ROOT -> LEX_STATE_INDENT;
|
14
|
+
# from certificate
|
15
|
+
LEX_STATE_CERTIFICATE -> LEX_STATE_INDENT;
|
16
|
+
LEX_STATE_CERTIFICATE -> LEX_STATE_ROOT;
|
17
|
+
# from indent
|
18
|
+
LEX_STATE_INDENT -> LEX_STATE_ROOT;
|
19
|
+
LEX_STATE_INDENT -> LEX_STATE_BANNER;
|
20
|
+
LEX_STATE_INDENT -> LEX_STATE_CERTIFICATE;
|
21
|
+
LEX_STATE_INDENT -> LEX_STATE_COMMENT;
|
22
|
+
LEX_STATE_INDENT -> LEX_STATE_INTEGER;
|
23
|
+
LEX_STATE_INDENT -> LEX_STATE_WORD;
|
24
|
+
# from comment
|
25
|
+
LEX_STATE_COMMENT -> LEX_STATE_ROOT;
|
26
|
+
# from integer
|
27
|
+
LEX_STATE_INTEGER -> LEX_STATE_DECIMAL;
|
28
|
+
LEX_STATE_INTEGER -> LEX_STATE_ROOT;
|
29
|
+
LEX_STATE_INTEGER -> LEX_STATE_INDENT;
|
30
|
+
LEX_STATE_INTEGER -> LEX_STATE_WORD;
|
31
|
+
# from decimal
|
32
|
+
LEX_STATE_DECIMAL -> LEX_STATE_WORD;
|
33
|
+
LEX_STATE_DECIMAL -> LEX_STATE_ROOT;
|
34
|
+
LEX_STATE_DECIMAL -> LEX_STATE_INDENT;
|
35
|
+
# from word
|
36
|
+
LEX_STATE_WORD -> LEX_STATE_ROOT;
|
37
|
+
LEX_STATE_WORD -> LEX_STATE_INDENT;
|
38
|
+
# from banner
|
39
|
+
LEX_STATE_BANNER -> LEX_STATE_ROOT;
|
40
|
+
|
41
|
+
}
|
Binary file
|
@@ -0,0 +1,507 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
|
3
|
+
static VALUE rb_mIOSParser;
|
4
|
+
static VALUE rb_cCLexer;
|
5
|
+
VALUE rb_eLexError;
|
6
|
+
|
7
|
+
typedef enum lex_token_state {
|
8
|
+
LEX_STATE_ROOT,
|
9
|
+
LEX_STATE_INTEGER,
|
10
|
+
LEX_STATE_DECIMAL,
|
11
|
+
LEX_STATE_QUOTED_STRING,
|
12
|
+
LEX_STATE_WORD,
|
13
|
+
LEX_STATE_COMMENT,
|
14
|
+
LEX_STATE_BANNER,
|
15
|
+
LEX_STATE_CERTIFICATE,
|
16
|
+
LEX_STATE_INDENT,
|
17
|
+
} lex_token_state;
|
18
|
+
|
19
|
+
struct LexInfo {
|
20
|
+
char *text;
|
21
|
+
size_t pos;
|
22
|
+
size_t token_start;
|
23
|
+
size_t token_length;
|
24
|
+
lex_token_state token_state;
|
25
|
+
VALUE tokens;
|
26
|
+
int indent;
|
27
|
+
int indent_pos;
|
28
|
+
int indents[100];
|
29
|
+
char banner_delimiter;
|
30
|
+
char string_terminator;
|
31
|
+
};
|
32
|
+
typedef struct LexInfo LexInfo;
|
33
|
+
|
34
|
+
#define IS_SPACE(C) C == ' ' || C == '\t' || C == '\r'
|
35
|
+
#define IS_NEWLINE(C) C == '\n'
|
36
|
+
#define IS_COMMENT(C) C == '!'
|
37
|
+
#define IS_DIGIT(C) '0' <= C && C <= '9'
|
38
|
+
#define IS_DOT(C) C == '.'
|
39
|
+
#define IS_DECIMAL(C) IS_DIGIT(C) || IS_DOT(C)
|
40
|
+
#define IS_LETTER(C) 'a' <= C && C <= 'z' || 'A' <= C && C <= 'Z'
|
41
|
+
#define IS_PUNCT(C) strchr("-+$:/,()|*#=<>!\"\\&@;%~{}'\"?[]_^`", C)
|
42
|
+
#define IS_WORD(C) IS_DECIMAL(C) || IS_LETTER(C) || IS_PUNCT(C)
|
43
|
+
#define IS_LEAD_ZERO(C) C == '0'
|
44
|
+
#define IS_QUOTE(C) C == '"' || C == '\''
|
45
|
+
#define IS_LEAD_COMMENT(C) C == '#' || C == '!'
|
46
|
+
|
47
|
+
#define CURRENT_CHAR(LEX) LEX->text[LEX->pos]
|
48
|
+
#define TOKEN_EMPTY(LEX) LEX->token_length <= 0
|
49
|
+
|
50
|
+
#define MAKE_TOKEN(LEX, TOK) rb_ary_new3(2, rb_int_new(LEX->token_start), TOK)
|
51
|
+
#define ADD_TOKEN(LEX, TOK) rb_ary_push(LEX->tokens, MAKE_TOKEN(LEX, TOK))
|
52
|
+
|
53
|
+
#define CMD_LEN(CMD) (sizeof(CMD) - 1)
|
54
|
+
int is_certificate(LexInfo *lex) {
|
55
|
+
VALUE indent_ary, indent, command_ary, command;
|
56
|
+
int token_count, indent_pos, command_pos;
|
57
|
+
|
58
|
+
token_count = RARRAY_LEN(lex->tokens);
|
59
|
+
indent_pos = token_count - 6;
|
60
|
+
if (indent_pos < 0) { return 0; }
|
61
|
+
|
62
|
+
command_pos = token_count - 5;
|
63
|
+
if (command_pos < 0) { return 0; }
|
64
|
+
|
65
|
+
indent_ary = rb_ary_entry(lex->tokens, indent_pos);
|
66
|
+
indent = rb_ary_entry(indent_ary, 1);
|
67
|
+
if (TYPE(indent) != T_SYMBOL) { return 0; }
|
68
|
+
if (rb_intern("INDENT") != SYM2ID(indent)) { return 0; }
|
69
|
+
|
70
|
+
command_ary = rb_ary_entry(lex->tokens, command_pos);
|
71
|
+
if (TYPE(command_ary) != T_ARRAY) { return 0; }
|
72
|
+
if (RARRAY_LEN(command_ary) < 2) { return 0; }
|
73
|
+
|
74
|
+
command = rb_ary_entry(command_ary, 1);
|
75
|
+
if (TYPE(command) != T_STRING) { return 0; }
|
76
|
+
|
77
|
+
StringValue(command);
|
78
|
+
if (RSTRING_LEN(command) != CMD_LEN("certificate")) { return 0; }
|
79
|
+
if (0 != strncmp(RSTRING_PTR(command), "certificate", 11)) { return 0; }
|
80
|
+
|
81
|
+
return 1;
|
82
|
+
}
|
83
|
+
|
84
|
+
int is_banner_begin(LexInfo *lex) {
|
85
|
+
VALUE banner_ary, banner;
|
86
|
+
int token_count = RARRAY_LEN(lex->tokens);
|
87
|
+
int banner_pos = token_count - 2;
|
88
|
+
|
89
|
+
if (banner_pos < 0) { return 0; }
|
90
|
+
|
91
|
+
banner_ary = rb_ary_entry(lex->tokens, banner_pos);
|
92
|
+
banner = rb_ary_entry(banner_ary, 1);
|
93
|
+
if (TYPE(banner) != T_STRING) { return 0; }
|
94
|
+
|
95
|
+
StringValue(banner);
|
96
|
+
if (RSTRING_LEN(banner) != CMD_LEN("banner")) { return 0; }
|
97
|
+
if (0 != strncmp(RSTRING_PTR(banner), "banner", 6)) { return 0; }
|
98
|
+
|
99
|
+
return 1;
|
100
|
+
}
|
101
|
+
|
102
|
+
static void delimit(LexInfo *lex) {
|
103
|
+
VALUE token;
|
104
|
+
char string[lex->token_length + 1];
|
105
|
+
|
106
|
+
if (TOKEN_EMPTY(lex)) {
|
107
|
+
lex->token_state = LEX_STATE_ROOT;
|
108
|
+
return;
|
109
|
+
}
|
110
|
+
|
111
|
+
switch (lex->token_state) {
|
112
|
+
case (LEX_STATE_QUOTED_STRING):
|
113
|
+
case (LEX_STATE_WORD):
|
114
|
+
case (LEX_STATE_BANNER):
|
115
|
+
case (LEX_STATE_CERTIFICATE):
|
116
|
+
token = rb_str_new(&lex->text[lex->token_start], lex->token_length);
|
117
|
+
break;
|
118
|
+
|
119
|
+
case (LEX_STATE_INTEGER):
|
120
|
+
strncpy(string, &lex->text[lex->token_start], lex->token_length);
|
121
|
+
string[lex->token_length] = '\0';
|
122
|
+
token = rb_int_new(atoi(string));
|
123
|
+
break;
|
124
|
+
|
125
|
+
case (LEX_STATE_DECIMAL):
|
126
|
+
strncpy(string, &lex->text[lex->token_start], lex->token_length);
|
127
|
+
string[lex->token_length] = '\0';
|
128
|
+
token = rb_float_new(atof(string));
|
129
|
+
break;
|
130
|
+
|
131
|
+
case (LEX_STATE_COMMENT):
|
132
|
+
lex->token_state = LEX_STATE_ROOT;
|
133
|
+
return;
|
134
|
+
|
135
|
+
default:
|
136
|
+
rb_raise(rb_eRuntimeError,
|
137
|
+
"Unable to commit token %s at %d",
|
138
|
+
string, (int)lex->pos);
|
139
|
+
return;
|
140
|
+
}
|
141
|
+
|
142
|
+
ADD_TOKEN(lex, token);
|
143
|
+
lex->token_state = LEX_STATE_ROOT;
|
144
|
+
lex->token_length = 0;
|
145
|
+
}
|
146
|
+
|
147
|
+
static void deallocate(void * lex) {
|
148
|
+
xfree(lex);
|
149
|
+
}
|
150
|
+
|
151
|
+
static void mark(void *ptr) {
|
152
|
+
LexInfo *lex = (LexInfo *)ptr;
|
153
|
+
rb_gc_mark(lex->tokens);
|
154
|
+
}
|
155
|
+
|
156
|
+
static VALUE allocate(VALUE klass) {
|
157
|
+
LexInfo * lex = ALLOC(LexInfo);
|
158
|
+
return Data_Wrap_Struct(klass, mark, deallocate, lex);
|
159
|
+
}
|
160
|
+
|
161
|
+
static VALUE initialize(VALUE self, VALUE input_text) {
|
162
|
+
LexInfo *lex;
|
163
|
+
Data_Get_Struct(self, LexInfo, lex);
|
164
|
+
|
165
|
+
lex->text = NULL;
|
166
|
+
lex->pos = 0;
|
167
|
+
lex->token_start = 0;
|
168
|
+
lex->token_length = 0;
|
169
|
+
lex->token_state = LEX_STATE_ROOT;
|
170
|
+
lex->tokens = rb_ary_new();
|
171
|
+
|
172
|
+
lex->indent = 0;
|
173
|
+
lex->indent_pos = 0;
|
174
|
+
lex->indents[0] = 0;
|
175
|
+
|
176
|
+
return self;
|
177
|
+
}
|
178
|
+
|
179
|
+
static void process_root(LexInfo * lex);
|
180
|
+
static void process_start_of_line(LexInfo * lex);
|
181
|
+
static void start_banner(LexInfo * lex);
|
182
|
+
|
183
|
+
static void process_newline(LexInfo *lex) {
|
184
|
+
delimit(lex);
|
185
|
+
|
186
|
+
if (is_banner_begin(lex)) {
|
187
|
+
lex->token_state = LEX_STATE_BANNER;
|
188
|
+
start_banner(lex);
|
189
|
+
lex->pos = lex->pos + 1;
|
190
|
+
lex->token_start = lex->pos;
|
191
|
+
lex->token_length = 0;
|
192
|
+
return;
|
193
|
+
}
|
194
|
+
|
195
|
+
lex->token_start = lex->pos;
|
196
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("EOL")));
|
197
|
+
lex->token_state = LEX_STATE_INDENT;
|
198
|
+
lex->indent = 0;
|
199
|
+
}
|
200
|
+
|
201
|
+
static void process_space(LexInfo *lex) {
|
202
|
+
delimit(lex);
|
203
|
+
}
|
204
|
+
|
205
|
+
static void process_comment(LexInfo *lex) {
|
206
|
+
char c = CURRENT_CHAR(lex);
|
207
|
+
|
208
|
+
if (IS_NEWLINE(c)) {
|
209
|
+
delimit(lex);
|
210
|
+
lex->token_state = LEX_STATE_INDENT;
|
211
|
+
lex->indent = 0;
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
static void process_quoted_string(LexInfo *lex) {
|
216
|
+
char c = CURRENT_CHAR(lex);
|
217
|
+
|
218
|
+
lex->token_length++;
|
219
|
+
if (!lex->string_terminator) {
|
220
|
+
lex->string_terminator = c;
|
221
|
+
} else if (c == lex->string_terminator) {
|
222
|
+
delimit(lex);
|
223
|
+
}
|
224
|
+
}
|
225
|
+
|
226
|
+
static void process_word(LexInfo *lex) {
|
227
|
+
char c = CURRENT_CHAR(lex);
|
228
|
+
|
229
|
+
if (IS_WORD(c)) {
|
230
|
+
lex->token_length++;
|
231
|
+
} else if (IS_SPACE(c)) {
|
232
|
+
process_space(lex);
|
233
|
+
} else if (IS_NEWLINE(c)) {
|
234
|
+
process_newline(lex);
|
235
|
+
}
|
236
|
+
}
|
237
|
+
|
238
|
+
static void process_decimal(LexInfo *lex) {
|
239
|
+
char c = CURRENT_CHAR(lex);
|
240
|
+
|
241
|
+
if (IS_DIGIT(c)) {
|
242
|
+
lex->token_length++;
|
243
|
+
} else if (IS_WORD(c)) {
|
244
|
+
lex->token_length++;
|
245
|
+
lex->token_state = LEX_STATE_WORD;
|
246
|
+
} else if (IS_SPACE(c)) {
|
247
|
+
process_space(lex);
|
248
|
+
} else if (IS_NEWLINE(c)) {
|
249
|
+
process_newline(lex);
|
250
|
+
}
|
251
|
+
}
|
252
|
+
|
253
|
+
static void process_integer(LexInfo *lex) {
|
254
|
+
char c = CURRENT_CHAR(lex);
|
255
|
+
|
256
|
+
if (IS_DIGIT(c)) {
|
257
|
+
lex->token_length++;
|
258
|
+
} else if (c == '.') {
|
259
|
+
lex->token_length++;
|
260
|
+
lex->token_state = LEX_STATE_DECIMAL;
|
261
|
+
} else if (IS_SPACE(c)) {
|
262
|
+
process_space(lex);
|
263
|
+
} else if (IS_NEWLINE(c)) {
|
264
|
+
process_newline(lex);
|
265
|
+
} else if (IS_WORD(c)) {
|
266
|
+
process_word(lex);
|
267
|
+
lex->token_state = LEX_STATE_WORD;
|
268
|
+
}
|
269
|
+
}
|
270
|
+
|
271
|
+
static void process_certificate(LexInfo *lex) {
|
272
|
+
char quit[5];
|
273
|
+
|
274
|
+
strncpy(quit, &CURRENT_CHAR(lex) - 5, 5);
|
275
|
+
|
276
|
+
if (0 == strncmp("quit\n", quit, 5)) {
|
277
|
+
int length = lex->token_length;
|
278
|
+
VALUE token;
|
279
|
+
|
280
|
+
length = length - 5;
|
281
|
+
while(' ' == lex->text[lex->token_start + length - 1]) {
|
282
|
+
length--;
|
283
|
+
}
|
284
|
+
lex->token_length = length;
|
285
|
+
|
286
|
+
token = rb_str_new(&lex->text[lex->token_start], lex->token_length);
|
287
|
+
|
288
|
+
rb_funcall(token, rb_intern("gsub!"), 2,
|
289
|
+
rb_str_new2("\n"), rb_str_new2(""));
|
290
|
+
|
291
|
+
rb_funcall(token, rb_intern("gsub!"), 2,
|
292
|
+
rb_str_new2(" "), rb_str_new2(" "));
|
293
|
+
|
294
|
+
ADD_TOKEN(lex, token);
|
295
|
+
lex->token_length = 0;
|
296
|
+
|
297
|
+
lex->token_start = lex->pos;
|
298
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("CERTIFICATE_END")));
|
299
|
+
|
300
|
+
process_newline(lex);
|
301
|
+
process_start_of_line(lex);
|
302
|
+
} else {
|
303
|
+
lex->token_length++;
|
304
|
+
}
|
305
|
+
}
|
306
|
+
|
307
|
+
static void start_certificate(LexInfo *lex) {
|
308
|
+
lex->indent_pos--;
|
309
|
+
rb_ary_pop(lex->tokens);
|
310
|
+
rb_ary_pop(lex->tokens);
|
311
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("CERTIFICATE_BEGIN")));
|
312
|
+
process_certificate(lex);
|
313
|
+
}
|
314
|
+
|
315
|
+
int is_banner_end_char(LexInfo *lex) {
|
316
|
+
return CURRENT_CHAR(lex) == lex->banner_delimiter &&
|
317
|
+
(0 < lex->pos && '\n' == lex->text[lex->pos - 1] ||
|
318
|
+
'\n' == lex->text[lex->pos + 1]);
|
319
|
+
}
|
320
|
+
|
321
|
+
int is_banner_end_string(LexInfo *lex) {
|
322
|
+
/* onlys accept the banner-ending string "EOF" */
|
323
|
+
return (CURRENT_CHAR(lex) == 'F' &&
|
324
|
+
lex->text[lex->pos - 1] == 'O' &&
|
325
|
+
lex->text[lex->pos - 2] == 'E' &&
|
326
|
+
lex->text[lex->pos - 3] == '\n');
|
327
|
+
}
|
328
|
+
|
329
|
+
static void process_banner(LexInfo *lex) {
|
330
|
+
if (lex->banner_delimiter && is_banner_end_char(lex)) {
|
331
|
+
lex->token_length++;
|
332
|
+
delimit(lex);
|
333
|
+
lex->token_start = lex->pos;
|
334
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("BANNER_END")));
|
335
|
+
if (lex->text[lex->pos + 1] == 'C') { lex->pos++; }
|
336
|
+
} else if (!lex->banner_delimiter && is_banner_end_string(lex)) {
|
337
|
+
lex->token_length -= 1;
|
338
|
+
delimit(lex);
|
339
|
+
lex->token_start = lex->pos;
|
340
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("BANNER_END")));
|
341
|
+
} else {
|
342
|
+
lex->token_length++;
|
343
|
+
}
|
344
|
+
}
|
345
|
+
|
346
|
+
static void start_banner(LexInfo *lex) {
|
347
|
+
char c = CURRENT_CHAR(lex);
|
348
|
+
lex->banner_delimiter = (c == '\n') ? 0 : c;
|
349
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("BANNER_BEGIN")));
|
350
|
+
if ('\n' == lex->text[lex->pos + 2]) lex->pos++;
|
351
|
+
}
|
352
|
+
|
353
|
+
static void process_start_of_line(LexInfo *lex) {
|
354
|
+
char c = CURRENT_CHAR(lex);
|
355
|
+
|
356
|
+
if (IS_SPACE(c)) {
|
357
|
+
lex->indent++;
|
358
|
+
return;
|
359
|
+
}
|
360
|
+
|
361
|
+
if (lex->indent > lex->indents[lex->indent_pos]) {
|
362
|
+
lex->token_start = lex->pos;
|
363
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("INDENT")));
|
364
|
+
lex->indent_pos++;
|
365
|
+
lex->indents[lex->indent_pos] = lex->indent;
|
366
|
+
} else {
|
367
|
+
while (lex->indent_pos >= 1 &&
|
368
|
+
lex->indent <= lex->indents[lex->indent_pos-1]) {
|
369
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("DEDENT")));
|
370
|
+
lex->indent_pos--;
|
371
|
+
}
|
372
|
+
}
|
373
|
+
|
374
|
+
if (IS_LEAD_COMMENT(c)) {
|
375
|
+
lex->token_state = LEX_STATE_COMMENT;
|
376
|
+
} else {
|
377
|
+
process_root(lex);
|
378
|
+
}
|
379
|
+
}
|
380
|
+
|
381
|
+
static void process_root(LexInfo *lex) {
|
382
|
+
char c;
|
383
|
+
c = CURRENT_CHAR(lex);
|
384
|
+
lex->token_start = lex->pos;
|
385
|
+
|
386
|
+
if (IS_SPACE(c)) {
|
387
|
+
delimit(lex);
|
388
|
+
|
389
|
+
} else if (is_banner_begin(lex)) {
|
390
|
+
lex->token_state = LEX_STATE_BANNER;
|
391
|
+
start_banner(lex);
|
392
|
+
lex->pos = lex->pos + 2;
|
393
|
+
lex->token_start = lex->pos;
|
394
|
+
lex->token_length = 0;
|
395
|
+
|
396
|
+
} else if (is_certificate(lex)) {
|
397
|
+
lex->token_state = LEX_STATE_CERTIFICATE;
|
398
|
+
start_certificate(lex);
|
399
|
+
|
400
|
+
} else if (IS_NEWLINE(c)) {
|
401
|
+
process_newline(lex);
|
402
|
+
|
403
|
+
} else if (IS_COMMENT(c)) {
|
404
|
+
lex->token_state = LEX_STATE_COMMENT;
|
405
|
+
process_comment(lex);
|
406
|
+
|
407
|
+
} else if (!(IS_LEAD_ZERO(c)) && IS_DIGIT(c)) {
|
408
|
+
lex->token_state = LEX_STATE_INTEGER;
|
409
|
+
process_integer(lex);
|
410
|
+
|
411
|
+
} else if (IS_QUOTE(c)) {
|
412
|
+
lex->token_state = LEX_STATE_QUOTED_STRING;
|
413
|
+
lex->string_terminator = '\0';
|
414
|
+
process_quoted_string(lex);
|
415
|
+
|
416
|
+
} else if (IS_WORD(c)) {
|
417
|
+
lex->token_state = LEX_STATE_WORD;
|
418
|
+
process_word(lex);
|
419
|
+
|
420
|
+
} else {
|
421
|
+
rb_raise(rb_eTypeError,
|
422
|
+
"Attempted to lex unknown character %c at %d",
|
423
|
+
c, (int)lex->pos);
|
424
|
+
}
|
425
|
+
}
|
426
|
+
|
427
|
+
static VALUE call(VALUE self, VALUE input_text) {
|
428
|
+
LexInfo *lex;
|
429
|
+
size_t input_len;
|
430
|
+
|
431
|
+
if (TYPE(input_text) != T_STRING) {
|
432
|
+
rb_raise(rb_eTypeError, "The argument to CLexer#call must be a String.");
|
433
|
+
return Qnil;
|
434
|
+
}
|
435
|
+
|
436
|
+
Data_Get_Struct(self, LexInfo, lex);
|
437
|
+
|
438
|
+
StringValue(input_text);
|
439
|
+
lex->text = RSTRING_PTR(input_text);
|
440
|
+
input_len = RSTRING_LEN(input_text);
|
441
|
+
|
442
|
+
for (lex->pos = 0; lex->pos < input_len; lex->pos++) {
|
443
|
+
switch(lex->token_state) {
|
444
|
+
case (LEX_STATE_ROOT):
|
445
|
+
process_root(lex);
|
446
|
+
break;
|
447
|
+
|
448
|
+
case (LEX_STATE_INDENT):
|
449
|
+
process_start_of_line(lex);
|
450
|
+
break;
|
451
|
+
|
452
|
+
case (LEX_STATE_INTEGER):
|
453
|
+
process_integer(lex);
|
454
|
+
break;
|
455
|
+
|
456
|
+
case (LEX_STATE_DECIMAL):
|
457
|
+
process_decimal(lex);
|
458
|
+
break;
|
459
|
+
|
460
|
+
case (LEX_STATE_QUOTED_STRING):
|
461
|
+
process_quoted_string(lex);
|
462
|
+
break;
|
463
|
+
|
464
|
+
case (LEX_STATE_WORD):
|
465
|
+
process_word(lex);
|
466
|
+
break;
|
467
|
+
|
468
|
+
case (LEX_STATE_COMMENT):
|
469
|
+
process_comment(lex);
|
470
|
+
break;
|
471
|
+
|
472
|
+
case (LEX_STATE_BANNER):
|
473
|
+
process_banner(lex);
|
474
|
+
break;
|
475
|
+
|
476
|
+
case (LEX_STATE_CERTIFICATE):
|
477
|
+
process_certificate(lex);
|
478
|
+
break;
|
479
|
+
}
|
480
|
+
}
|
481
|
+
|
482
|
+
if (lex->token_state == LEX_STATE_QUOTED_STRING) {
|
483
|
+
rb_raise(rb_eLexError,
|
484
|
+
"Unterminated quoted string starting at %d: %.*s",
|
485
|
+
(int)lex->token_start,
|
486
|
+
(int)lex->token_length, &lex->text[lex->token_start]);
|
487
|
+
}
|
488
|
+
|
489
|
+
delimit(lex);
|
490
|
+
lex->token_start = lex->pos;
|
491
|
+
|
492
|
+
for (; lex->indent_pos > 0; lex->indent_pos--) {
|
493
|
+
ADD_TOKEN(lex, ID2SYM(rb_intern("DEDENT")));
|
494
|
+
}
|
495
|
+
|
496
|
+
return lex->tokens;
|
497
|
+
}
|
498
|
+
|
499
|
+
void Init_c_lexer() {
|
500
|
+
rb_mIOSParser = rb_define_module("IOSParser");
|
501
|
+
rb_cCLexer = rb_define_class_under(rb_mIOSParser, "CLexer", rb_cObject);
|
502
|
+
rb_eLexError = rb_define_class_under(rb_mIOSParser, "LexError",
|
503
|
+
rb_eStandardError);
|
504
|
+
rb_define_alloc_func(rb_cCLexer, allocate);
|
505
|
+
rb_define_method(rb_cCLexer, "initialize", initialize, 0);
|
506
|
+
rb_define_method(rb_cCLexer, "call", call, 1);
|
507
|
+
}
|