peglite 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gemspec ADDED
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ GemSpec = Gem::Specification.new do |gem|
4
+ gem.name = 'peglite'
5
+ gem.version = '0.0.1'
6
+ gem.license = 'MIT'
7
+ gem.required_ruby_version = '>= 1.9.1'
8
+
9
+ gem.authors << 'Ingy döt Net'
10
+ gem.email = 'ingy@ingy.net'
11
+ gem.summary = 'Simple PEG Parsing Framework'
12
+ gem.description = <<-'.'
13
+ PegLite is a very simple framework for creating your own PEG parsers.
14
+ .
15
+ gem.homepage = 'http://pegex.org'
16
+
17
+ gem.files = `git ls-files`.lines.map{|l|l.chomp}
18
+ end
data/CHANGELOG.yaml ADDED
@@ -0,0 +1,3 @@
1
+ - version: 0.0.1
2
+ date: Wed Jan 2 14:51:35 PST 2013
3
+ changes: Ported from Pegex.pm CPAN module.
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ (The MIT License)
2
+
3
+ Copyright © 2013 Ingy döt Net
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the ‘Software’), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9
+ of the Software, and to permit persons to whom the Software is furnished to do
10
+ so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED ‘AS IS’, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,38 @@
1
+ = PegLite - Simple PEG Parsing Framework
2
+
3
+ PegLite is a very easy way to make a PEG parser by simply writing a PegLite
4
+ subclass and a few regexes.
5
+
6
+ = Synopsis
7
+
8
+ require 'peglite'
9
+ class MyParser < PegLite
10
+ def top
11
+ match rule "( comment | assignment )*"
12
+ end
13
+ token comment: /\s*#.*\s*\n/
14
+ def assignment
15
+ m = match "id WS* EQUAL WS* value WS* EOL"
16
+ return { 'var' => m[0], 'value' => m[1] }
17
+ end
18
+ end
19
+
20
+ puts YAML.dump MyParser.new.parse <<'...'
21
+ # This is a comment. Below is an assignment statement:
22
+ foo = 123
23
+
24
+ # A string assignment:
25
+ bar = 'O HAI'
26
+ ...
27
+
28
+ The above program, defines a parser, uses it to parse a text into a data tree,
29
+ and then prints the tree in YAML:
30
+
31
+ - var: foo
32
+ value: 123
33
+ - var: bar
34
+ value: O HAI
35
+
36
+ Pretty simple, right?
37
+
38
+
data/Rakefile ADDED
@@ -0,0 +1,67 @@
1
+ # Load Gem constants from the gemspec
2
+ GemSpecFile = '.gemspec'
3
+ load GemSpecFile
4
+ GemName = GemSpec.name
5
+ GemVersion = GemSpec.version
6
+ GemDir = "#{GemName}-#{GemVersion}"
7
+ GemFile = "#{GemDir}.gem"
8
+ DevNull = '2>/dev/null'
9
+
10
+ # Require the Rake libraries
11
+ require 'rake'
12
+ require 'rake/testtask'
13
+ require 'rake/clean'
14
+ if File.exists? 'test/testml.yaml'
15
+ require 'rake/testml'
16
+ end
17
+
18
+ task :default => 'help'
19
+
20
+ CLEAN.include GemDir, GemFile, 'data.tar.gz', 'metadata.gz'
21
+
22
+ desc 'Run the tests'
23
+ task :test do
24
+ Rake::TestTask.new do |t|
25
+ t.verbose = true
26
+ t.test_files = FileList['test/*.rb']
27
+ end
28
+ end
29
+
30
+ desc 'Build the gem'
31
+ task :build => [:clean, :test] do
32
+ sh "gem build #{GemSpecFile}"
33
+ end
34
+
35
+ desc 'Install the gem'
36
+ task :install => [:build] do
37
+ sh "gem install #{GemFile}"
38
+ end
39
+
40
+ desc 'Build, unpack and inspect the gem'
41
+ task :distdir => [:build] do
42
+ sh "tar xf #{GemFile} #{DevNull}"
43
+ Dir.mkdir GemDir
44
+ Dir.chdir GemDir
45
+ sh "tar xzf ../data.tar.gz #{DevNull}"
46
+ puts "\n>>> Entering sub-shell for #{GemDir}..."
47
+ system ENV['SHELL']
48
+ end
49
+
50
+ desc 'Build and push the gem'
51
+ task :release => [:build] do
52
+ sh "gem push #{GemFile}"
53
+ end
54
+
55
+ desc 'Print a description of the gem'
56
+ task :desc do
57
+ puts "Gem: '#{GemName}' (version #{GemVersion})"
58
+ puts
59
+ puts GemSpec.description.gsub /^/, ' '
60
+ end
61
+
62
+ desc 'List the Rakefile tasks'
63
+ task :help do
64
+ puts 'The following rake tasks are available:'
65
+ puts
66
+ puts `rake -T`.gsub /^/, ' '
67
+ end
@@ -0,0 +1,125 @@
1
+ class PegLite;end
2
+
3
+ #------------------------------------------------------------------------------
4
+ class PegLite::Compiler
5
+ def initialize peglite_rule
6
+ tokenize peglite_rule
7
+ end
8
+
9
+ def compile
10
+ fail if @tokens.empty?
11
+ if @tokens[0] == '('
12
+ @tokens.shift
13
+ got = compile
14
+ fail if (@tokens.empty? or not @tokens.shift.match /^\)([\?\*\+]?)/)
15
+ got.merge! compile_limits $1
16
+ elsif @tokens.size > 1
17
+ if @tokens[1] == '|'
18
+ got = compile_any
19
+ else
20
+ got = compile_all
21
+ end
22
+ else
23
+ fail @tokens.to_s
24
+ end
25
+ return got
26
+ end
27
+
28
+ def compile_all
29
+ fail if @tokens.empty?
30
+ all = []
31
+ until @tokens.empty?
32
+ if @tokens[0] == '('
33
+ all.push compile
34
+ elsif @tokens[0].match /^\)/
35
+ break
36
+ elsif @tokens[0].match /^\w/
37
+ all.push compile_ref
38
+ else
39
+ fail
40
+ end
41
+ end
42
+ return {
43
+ 'type' => 'all',
44
+ 'rule' => all,
45
+ 'min' => 1,
46
+ 'max' => 1,
47
+ }
48
+ end
49
+
50
+ def compile_any
51
+ fail if @tokens.empty?
52
+ any = []
53
+ until @tokens.empty?
54
+ if @tokens[0] == '('
55
+ any.push compile
56
+ elsif @tokens[0].match /^\)/
57
+ break
58
+ elsif @tokens[0].match /^\w/
59
+ any.push compile_ref
60
+ if not @tokens.empty?
61
+ if @tokens[0] == '|'
62
+ @tokens.shift
63
+ elsif not @tokens[0].match /^\)/
64
+ fail
65
+ end
66
+ end
67
+ else
68
+ fail
69
+ end
70
+ end
71
+ return {
72
+ 'type' => 'any',
73
+ 'rule' => any,
74
+ 'min' => 1,
75
+ 'max' => 1,
76
+ }
77
+ end
78
+
79
+ def compile_ref
80
+ fail if @tokens.empty?
81
+ token = @tokens.shift
82
+ token.match(/^(\w+)([\?\*\+]?)$/) or fail
83
+ rule, quantifier = $1, $2
84
+ ref = {
85
+ 'type' => 'ref',
86
+ 'rule' => rule,
87
+ }
88
+ return ref.merge! compile_limits(quantifier)
89
+ end
90
+
91
+ def compile_limits quantifier
92
+ case quantifier
93
+ when '?'; { 'min' => 0, 'max' => 1 }
94
+ when '*'; { 'min' => 0, 'max' => 0 }
95
+ when '+'; { 'min' => 1, 'max' => 0 }
96
+ else; { 'min' => 1, 'max' => 1 }
97
+ end
98
+ end
99
+
100
+ def tokenize text
101
+ input = text.clone
102
+ @tokens = []
103
+ while (token = get_token input)
104
+ @tokens.concat token
105
+ end
106
+ end
107
+
108
+ def get_token input
109
+ return if input.empty?
110
+ patterns = [
111
+ /\A\s+/,
112
+ /\A(\()/,
113
+ /\A(\w+[\?\*\+]?)/,
114
+ /\A(\|)/,
115
+ /\A(\)[\?\*\+]?)/,
116
+ ]
117
+ patterns.each do |r|
118
+ if m = input.match(r)
119
+ input.sub! r, ''
120
+ return m.captures
121
+ end
122
+ end
123
+ fail "Failed to find next token in '#{input}'"
124
+ end
125
+ end
data/lib/peglite.rb ADDED
@@ -0,0 +1,202 @@
1
+ require 'peglite/compiler'
2
+
3
+ require 'yaml'; def XXX *args; args.each \
4
+ {|a|puts YAML.dump a};puts caller.first; exit; end
5
+ require 'yaml'; def YYY *args; args.each \
6
+ {|a|puts YAML.dump a};puts caller.first;return args[0]; end
7
+
8
+ #------------------------------------------------------------------------------
9
+ class PegLite
10
+ $PegLiteRules = {} # XXX global variable smell
11
+ def self.rule args
12
+ name, rule = args.first
13
+ name = name.to_s
14
+ $PegLiteTopRule ||= name
15
+ if rule.kind_of? Regexp
16
+ fail "Regexp for '#{name}' must begin with '\\A'" \
17
+ unless rule.to_s.match /\A\(\?-mix:\\A/
18
+ $PegLiteRules[name] = {
19
+ 'type' => 'rgx',
20
+ 'rule' => rule,
21
+ 'min' => 1,
22
+ 'max' => 1,
23
+ }
24
+ elsif rule.kind_of? String
25
+ $PegLiteRules[name] = PegLite::Compiler.new(rule).compile
26
+ else
27
+ fail "Don't know how to make rule '#{name}' from '#{rule}'"
28
+ end
29
+ end
30
+
31
+ rule _: (/\A\s*/)
32
+ rule __: (/\A\s+/)
33
+ rule EQUAL: (/\A=/)
34
+ rule COMMA: (/\A,/)
35
+ rule NL: (/\A\n/)
36
+ $PegLiteTopRule = nil
37
+
38
+ attr_accessor :got
39
+ attr_accessor :wrap
40
+ attr_accessor :debug
41
+ def initialize attrs={}
42
+ @got = nil
43
+ @wrap = false
44
+ @debug = false
45
+
46
+ attrs.each { |k,v| self.send "#{k}=", v }
47
+
48
+ @pos = 0
49
+ @far = 0
50
+ @rules = $PegLiteRules
51
+ yield self if block_given?
52
+ end
53
+
54
+ def parse input, top=($PegLiteTopRule || 'top')
55
+ fail "PegLite parse() method requires an input string" \
56
+ unless input.kind_of? String
57
+ @input = input
58
+ got = match_ref top
59
+ failure if @pos < @input.length
60
+ return @got || got[0]
61
+ end
62
+
63
+ def match rule=nil
64
+ if not rule.kind_of? Hash
65
+ rule ||= caller.first.scan(/(\w+)/).last.first
66
+ rule_name = rule
67
+ if rule.kind_of? String
68
+ rule = @rules[rule]
69
+ end
70
+ fail "Can't find rule for '#{rule_name}'" \
71
+ if not(rule and rule.kind_of? Hash)
72
+ end
73
+
74
+ pos, count, matched, type, child, min, max =
75
+ @pos, 0, [], *(rule.values_at *%w(type rule min max))
76
+
77
+ while (result = self.method("match_#{type}").call(child))
78
+ pos = @pos
79
+ count += 1
80
+ if result.kind_of? Array
81
+ matched.concat result
82
+ else
83
+ matched << result
84
+ end
85
+ break if max == 1
86
+ end
87
+
88
+ if count >= min and (max == 0 or count <= max)
89
+ return matched
90
+ else
91
+ @pos = pos
92
+ return
93
+ end
94
+ end
95
+
96
+ def match_all all
97
+ pos, set, count = @pos, [], 0
98
+ all.each do |elem|
99
+ if (m = match elem)
100
+ set.concat m
101
+ count += 1
102
+ else
103
+ if (@pos = pos) > @far
104
+ @far = pos
105
+ end
106
+ return
107
+ end
108
+ end
109
+ set = [ set ] if count > 1
110
+ return set
111
+ end
112
+
113
+ def match_any any
114
+ any.each do |elem|
115
+ if (m = match elem)
116
+ return m
117
+ end
118
+ end
119
+ return
120
+ end
121
+
122
+ # TODO move trace/debug out of default match_ref method
123
+ def match_ref ref
124
+ trace "Try #{ref}" if @debug
125
+ begin
126
+ m = self.method(ref).call
127
+ rescue NameError => e
128
+ if @rules[ref]
129
+ m = match @rules[ref]
130
+ else
131
+ fail "No rule defined for '#{ref}'"
132
+ end
133
+ end
134
+ if m
135
+ m = (@wrap and not m.empty?) ? [{ref => m}] : m
136
+ trace "Got #{ref}" if @debug
137
+ else
138
+ trace "Not #{ref}" if @debug
139
+ end
140
+ return m
141
+ end
142
+
143
+ def match_rgx regex
144
+ m = @input[@pos..-1].match(regex)
145
+ return unless m
146
+ @pos += m[0].length
147
+ match = m.captures
148
+ # XXX not sure about this:
149
+ match = [ match ] if m.length > 2
150
+ @far = @pos if @pos > @far
151
+ return match
152
+ end
153
+
154
+ #----------------------------------------------------------------------------
155
+ # Debugging and error reporting support methods
156
+ #----------------------------------------------------------------------------
157
+ def trace action
158
+ indent = !!action.match(/^Try /)
159
+ @indent ||= 0
160
+ @indent -= 1 unless indent
161
+ $stderr.print ' ' * @indent
162
+ @indent += 1 if indent
163
+ snippet = @input[@pos..-1]
164
+ snippet = snippet[0..30] + '...' if snippet.length > 30;
165
+ snippet.gsub! /\n/, "\\n"
166
+ $stderr.printf "%-30s", action
167
+ $stderr.print indent ? " >#{snippet}<\n" : "\n"
168
+ end
169
+
170
+ def failure
171
+ msg = "Parse failed for some reason"
172
+ raise PegexParseError, format_error(msg)
173
+ end
174
+
175
+ class PegexParseError < RuntimeError;end
176
+ def format_error msg
177
+ buffer = @input
178
+ position = @far
179
+ real_pos = @pos
180
+
181
+ line = buffer[0, position].scan(/\n/).size + 1
182
+ column = position - (buffer.rindex("\n", position) || -1)
183
+
184
+ pretext = @input[
185
+ position < 50 ? 0 : position - 50,
186
+ position < 50 ? position : 50
187
+ ]
188
+ context = @input[position, 50]
189
+ pretext.gsub! /.*\n/m, ''
190
+ context.gsub! /\n/, "\\n"
191
+
192
+ return <<"..."
193
+ Error parsing Pegex document:
194
+ message: #{msg}
195
+ line: #{line}
196
+ column: #{column}
197
+ position: #{position}
198
+ context: #{pretext}#{context}
199
+ #{' ' * (pretext.length + 10)}^
200
+ ...
201
+ end
202
+ end
data/test/address.rb ADDED
@@ -0,0 +1,56 @@
1
+ require 'test/unit'
2
+ require 'peglite'
3
+
4
+ $address1 = <<'...'
5
+ John Doe
6
+ 123 Main St
7
+ Los Angeles, CA 90009
8
+ ...
9
+
10
+ $parse_plain = <<'...'
11
+ ---
12
+ - John Doe
13
+ - 123 Main St
14
+ - - Los Angeles
15
+ - CA
16
+ - '90009'
17
+ ...
18
+
19
+ $parse_wrap = <<'...'
20
+ ---
21
+ address:
22
+ - - name:
23
+ - John Doe
24
+ - street:
25
+ - 123 Main St
26
+ - place:
27
+ - - city:
28
+ - Los Angeles
29
+ - state:
30
+ - CA
31
+ - zip:
32
+ - '90009'
33
+ ...
34
+
35
+ class Test::Unit::TestCase
36
+ def test_plain
37
+ parser = AddressParser.new
38
+ result = parser.parse $address1
39
+ assert_equal YAML.dump(result), $parse_plain, "Plain parse works"
40
+ end
41
+ def test_wrap
42
+ parser = AddressParser.new wrap: true
43
+ result = parser.parse $address1
44
+ assert_equal YAML.dump(result), $parse_wrap, "Wrapping parse works"
45
+ end
46
+ end
47
+
48
+ class AddressParser < PegLite
49
+ rule address: "name street place"
50
+ rule name: (/\A(.*?)\n/)
51
+ rule street: (/\A(.*?)\n/)
52
+ rule place: "city COMMA _ state __ zip NL"
53
+ rule city: (/\A(\w+(?: \w+)?)/)
54
+ rule state: (/\A(WA|OR|CA)/) # Left Coast Rulez
55
+ rule zip: (/\A(\d{5})/)
56
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: peglite
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ingy döt Net
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-02 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: ! 'PegLite is a very simple framework for creating your own PEG parsers.
15
+
16
+ '
17
+ email: ingy@ingy.net
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .gemspec
23
+ - CHANGELOG.yaml
24
+ - Gemfile
25
+ - LICENSE
26
+ - README.rdoc
27
+ - Rakefile
28
+ - lib/peglite.rb
29
+ - lib/peglite/compiler.rb
30
+ - test/address.rb
31
+ homepage: http://pegex.org
32
+ licenses:
33
+ - MIT
34
+ post_install_message:
35
+ rdoc_options: []
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: 1.9.1
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ none: false
46
+ requirements:
47
+ - - ! '>='
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ requirements: []
51
+ rubyforge_project:
52
+ rubygems_version: 1.8.23
53
+ signing_key:
54
+ specification_version: 3
55
+ summary: Simple PEG Parsing Framework
56
+ test_files: []