rdf-turtle 1.0.0 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,174 +1,152 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rdf-turtle
3
- version: !ruby/object:Gem::Version
4
- hash: 23
5
- prerelease:
6
- segments:
7
- - 1
8
- - 0
9
- - 0
10
- version: 1.0.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.2
11
5
  platform: ruby
12
- authors:
6
+ authors:
13
7
  - Gregg Kellogg
14
8
  autorequire:
15
9
  bindir: bin
16
10
  cert_chain: []
17
-
18
- date: 2013-01-19 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
11
+ date: 2013-03-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
21
14
  name: rdf
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
22
21
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 15
29
- segments:
30
- - 1
31
- - 0
32
- version: "1.0"
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: ebnf
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.2.1
33
34
  type: :runtime
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: open-uri-cached
37
35
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- hash: 21
44
- segments:
45
- - 0
46
- - 0
47
- - 5
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.2.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: open-uri-cached
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
48
47
  version: 0.0.5
49
48
  type: :development
50
- version_requirements: *id002
51
- - !ruby/object:Gem::Dependency
52
- name: rspec
53
49
  prerelease: false
54
- requirement: &id003 !ruby/object:Gem::Requirement
55
- none: false
56
- requirements:
57
- - - ">="
58
- - !ruby/object:Gem::Version
59
- hash: 63
60
- segments:
61
- - 2
62
- - 12
63
- - 0
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.0.5
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
64
61
  version: 2.12.0
65
62
  type: :development
66
- version_requirements: *id003
67
- - !ruby/object:Gem::Dependency
68
- name: rdf-isomorphic
69
63
  prerelease: false
70
- requirement: &id004 !ruby/object:Gem::Requirement
71
- none: false
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- hash: 3
76
- segments:
77
- - 0
78
- version: "0"
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: 2.12.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: rdf-isomorphic
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
79
76
  type: :development
80
- version_requirements: *id004
81
- - !ruby/object:Gem::Dependency
82
- name: json-ld
83
77
  prerelease: false
84
- requirement: &id005 !ruby/object:Gem::Requirement
85
- none: false
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- hash: 3
90
- segments:
91
- - 0
92
- version: "0"
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: json-ld
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
93
90
  type: :development
94
- version_requirements: *id005
95
- - !ruby/object:Gem::Dependency
96
- name: yard
97
91
  prerelease: false
98
- requirement: &id006 !ruby/object:Gem::Requirement
99
- none: false
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- hash: 57
104
- segments:
105
- - 0
106
- - 8
107
- - 3
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: yard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
108
103
  version: 0.8.3
109
104
  type: :development
110
- version_requirements: *id006
111
- - !ruby/object:Gem::Dependency
112
- name: sxp
113
105
  prerelease: false
114
- requirement: &id007 !ruby/object:Gem::Requirement
115
- none: false
116
- requirements:
117
- - - ">="
118
- - !ruby/object:Gem::Version
119
- hash: 3
120
- segments:
121
- - 0
122
- version: "0"
123
- type: :development
124
- version_requirements: *id007
125
- - !ruby/object:Gem::Dependency
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 0.8.3
111
+ - !ruby/object:Gem::Dependency
126
112
  name: rdf-spec
127
- prerelease: false
128
- requirement: &id008 !ruby/object:Gem::Requirement
129
- none: false
130
- requirements:
131
- - - ">="
132
- - !ruby/object:Gem::Version
133
- hash: 15
134
- segments:
135
- - 1
136
- - 0
137
- version: "1.0"
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '1.0'
138
118
  type: :development
139
- version_requirements: *id008
140
- - !ruby/object:Gem::Dependency
141
- name: rake
142
119
  prerelease: false
143
- requirement: &id009 !ruby/object:Gem::Requirement
144
- none: false
145
- requirements:
146
- - - ">="
147
- - !ruby/object:Gem::Version
148
- hash: 3
149
- segments:
150
- - 0
151
- version: "0"
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ! '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '1.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rake
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
152
132
  type: :development
153
- version_requirements: *id009
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
154
139
  description: RDF::Turtle is an Turtle reader/writer for the RDF.rb library suite.
155
140
  email: public-rdf-ruby@w3.org
156
141
  executables: []
157
-
158
142
  extensions: []
159
-
160
143
  extra_rdoc_files: []
161
-
162
- files:
144
+ files:
163
145
  - AUTHORS
164
- - README.markdown
146
+ - README.md
165
147
  - History
166
148
  - UNLICENSE
167
149
  - VERSION
168
- - lib/ebnf.rb
169
- - lib/rdf/ll1/lexer.rb
170
- - lib/rdf/ll1/parser.rb
171
- - lib/rdf/ll1/scanner.rb
172
150
  - lib/rdf/turtle/format.rb
173
151
  - lib/rdf/turtle/meta.rb
174
152
  - lib/rdf/turtle/reader.rb
@@ -177,40 +155,28 @@ files:
177
155
  - lib/rdf/turtle/writer.rb
178
156
  - lib/rdf/turtle.rb
179
157
  homepage: http://github.com/ruby-rdf/rdf-turtle
180
- licenses:
158
+ licenses:
181
159
  - Public Domain
160
+ metadata: {}
182
161
  post_install_message:
183
162
  rdoc_options: []
184
-
185
- require_paths:
163
+ require_paths:
186
164
  - lib
187
- required_ruby_version: !ruby/object:Gem::Requirement
188
- none: false
189
- requirements:
190
- - - ">="
191
- - !ruby/object:Gem::Version
192
- hash: 53
193
- segments:
194
- - 1
195
- - 8
196
- - 1
165
+ required_ruby_version: !ruby/object:Gem::Requirement
166
+ requirements:
167
+ - - ! '>='
168
+ - !ruby/object:Gem::Version
197
169
  version: 1.8.1
198
- required_rubygems_version: !ruby/object:Gem::Requirement
199
- none: false
200
- requirements:
201
- - - ">="
202
- - !ruby/object:Gem::Version
203
- hash: 3
204
- segments:
205
- - 0
206
- version: "0"
170
+ required_rubygems_version: !ruby/object:Gem::Requirement
171
+ requirements:
172
+ - - ! '>='
173
+ - !ruby/object:Gem::Version
174
+ version: '0'
207
175
  requirements: []
208
-
209
176
  rubyforge_project: rdf-turtle
210
- rubygems_version: 1.8.24
177
+ rubygems_version: 2.0.3
211
178
  signing_key:
212
- specification_version: 3
179
+ specification_version: 4
213
180
  summary: Turtle reader/writer for Ruby.
214
181
  test_files: []
215
-
216
182
  has_rdoc: false
@@ -1,638 +0,0 @@
1
- require 'strscan'
2
-
3
- # Extended Bakus-Nour Form (EBNF), being the W3C variation is
4
- # originaly defined in the
5
- # [W3C XML 1.0 Spec](http://www.w3.org/TR/REC-xml/#sec-notation).
6
- #
7
- # This version attempts to be less strict than the strict definition
8
- # to allow for coloquial variations (such as in the Turtle syntax).
9
- #
10
- # A rule takes the following form:
11
- # \[1\] symbol ::= expression
12
- #
13
- # Comments include the content between '/*' and '*/'
14
- #
15
- # @see http://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
16
- # @see http://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
17
- #
18
- # Based on bnf2turtle by Dan Connolly.
19
- #
20
- # Motivation
21
- # ----------
22
- #
23
- # Many specifications include grammars that look formal but are not
24
- # actually checked, by machine, against test data sets. Debugging the
25
- # grammar in the XML specification has been a long, tedious manual
26
- # process. Only when the loop is closed between a fully formal grammar
27
- # and a large test data set can we be confident that we have an accurate
28
- # specification of a language (and even then, only the syntax of the language).
29
- #
30
- #
31
- # The grammar in the [N3 design note][] has evolved based on the original
32
- # manual transcription into a python recursive-descent parser and
33
- # subsequent development of test cases. Rather than maintain the grammar
34
- # and the parser independently, our [goal] is to formalize the language
35
- # syntax sufficiently to replace the manual implementation with one
36
- # derived mechanically from the specification.
37
- #
38
- #
39
- # [N3 design note]: http://www.w3.org/DesignIssues/Notation3
40
- #
41
- # Related Work
42
- # ------------
43
- #
44
- # Sean Palmer's [n3p announcement][] demonstrated the feasibility of the
45
- # approach, though that work did not cover some aspects of N3.
46
- #
47
- # In development of the [SPARQL specification][], Eric Prud'hommeaux
48
- # developed [Yacker][], which converts EBNF syntax to perl and C and C++
49
- # yacc grammars. It includes an interactive facility for checking
50
- # strings against the resulting grammars.
51
- # Yosi Scharf used it in [cwm Release 1.1.0rc1][], which includes
52
- # a SPAQRL parser that is *almost* completely mechanically generated.
53
- #
54
- # The N3/turtle output from yacker is lower level than the EBNF notation
55
- # from the XML specification; it has the ?, +, and * operators compiled
56
- # down to pure context-free rules, obscuring the grammar
57
- # structure. Since that transformation is straightforwardly expressed in
58
- # semantic web rules (see [bnf-rules.n3][]), it seems best to keep the RDF
59
- # expression of the grammar in terms of the higher level EBNF
60
- # constructs.
61
- #
62
- # [goal]: http://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
63
- # [n3p announcement]: http://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
64
- # [Yacker]: http://www.w3.org/1999/02/26-modules/User/Yacker
65
- # [SPARQL specification]: http://www.w3.org/TR/rdf-sparql-query/
66
- # [Cwm Release 1.1.0rc1]: http://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
67
- # [bnf-rules.n3]: http://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
68
- #
69
- # Open Issues and Future Work
70
- # ---------------------------
71
- #
72
- # The yacker output also has the terminals compiled to elaborate regular
73
- # expressions. The best strategy for dealing with lexical tokens is not
74
- # yet clear. Many tokens in SPARQL are case insensitive; this is not yet
75
- # captured formally.
76
- #
77
- # The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...)
78
- # is not yet published; it should be aligned with [swap/grammar/bnf][]
79
- # and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar
80
- # in the SPARQL and XML specificiations).
81
- #
82
- # It would be interesting to corroborate the claim in the SPARQL spec
83
- # that the grammar is LL(1) with a mechanical proof based on N3 rules.
84
- #
85
- # [swap/grammar/bnf]: http://www.w3.org/2000/10/swap/grammar/bnf
86
- # [bnf2html.n3]: http://www.w3.org/2000/10/swap/grammar/bnf2html.n3
87
- #
88
- # Background
89
- # ----------
90
- #
91
- # The [N3 Primer] by Tim Berners-Lee introduces RDF and the Semantic
92
- # web using N3, a teaching and scribbling language. Turtle is a subset
93
- # of N3 that maps directly to (and from) the standard XML syntax for
94
- # RDF.
95
- #
96
- # [N3 Primer]: http://www.w3.org/2000/10/swap/Primer.html
97
- #
98
- # @author Gregg Kellogg
99
- class EBNF
100
- class Rule
101
- # @!attribute [r] sym for rule
102
- # @return [Symbol]
103
- attr_reader :sym
104
-
105
- # @!attribute [r] id of rule
106
- # @return [String]
107
- attr_reader :id
108
-
109
- # @!attribute [r] kind of rule
110
- # @return [:rule, :token, or :pass]
111
- attr_accessor :kind
112
-
113
- # @!attribute [r] expr rule expression
114
- # @return [Array]
115
- attr_reader :expr
116
-
117
- # @!attribute [r] orig original rule
118
- # @return [String]
119
- attr_accessor :orig
120
-
121
- # @param [Integer] id
122
- # @param [Symbol] sym
123
- # @param [Array] expr
124
- # @param [EBNF] ebnf
125
- def initialize(id, sym, expr, ebnf)
126
- @id, @sym, @expr, @ebnf = id, sym, expr, ebnf
127
- end
128
-
129
- # Serializes this rule to an S-Expression
130
- # @return [String]
131
- def to_sxp
132
- [id, sym, kind, expr].to_sxp
133
- end
134
-
135
- # Serializes this rule to an Turtle
136
- # @return [String]
137
- def to_ttl
138
- @ebnf.debug("to_ttl") {inspect}
139
- comment = orig.strip.
140
- gsub(/"""/, '\"\"\"').
141
- gsub("\\", "\\\\").
142
- sub(/^\"/, '\"').
143
- sub(/\"$/m, '\"')
144
- statements = [
145
- %{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
146
- %{ rdfs:comment #{comment.inspect};},
147
- ]
148
-
149
- statements += ttl_expr(expr, kind == :token ? "re" : "g", 1, false)
150
- "\n" + statements.join("\n")
151
- end
152
-
153
- def inspect
154
- {:sym => sym, :id => id, kind => kind, :expr => expr}.inspect
155
- end
156
-
157
- private
158
- def ttl_expr(expr, pfx, depth, is_obj = true)
159
- indent = ' ' * depth
160
- @ebnf.debug("ttl_expr", :depth => depth) {expr.inspect}
161
- op = expr.shift if expr.is_a?(Array)
162
- statements = []
163
-
164
- if is_obj
165
- bra, ket = "[ ", " ]"
166
- else
167
- bra = ket = ''
168
- end
169
-
170
- case op
171
- when :seq, :alt, :diff
172
- statements << %{#{indent}#{bra}#{pfx}:#{op} (}
173
- expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
174
- statements << %{#{indent} )#{ket}}
175
- when :opt, :plus, :star
176
- statements << %{#{indent}#{bra}#{pfx}:#{op} }
177
- statements += ttl_expr(expr.first, pfx, depth + 1)
178
- statements << %{#{indent} #{ket}} unless ket.empty?
179
- when :"'"
180
- statements << %{#{indent}"#{esc(expr)}"}
181
- when :range
182
- statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
183
- when :hex
184
- raise "didn't expect \" in expr" if expr.include?(:'"')
185
- statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
186
- else
187
- if is_obj
188
- statements << %{#{indent}#{expr.inspect}}
189
- else
190
- statements << %{#{indent}g:seq ( #{expr.inspect} )}
191
- end
192
- end
193
-
194
- statements.last << " ." unless is_obj
195
- @ebnf.debug("statements", :depth => depth) {statements.join("\n")}
196
- statements
197
- end
198
-
199
- ##
200
- # turn an XML BNF character class into an N3 literal for that
201
- # character class (less the outer quote marks)
202
- #
203
- # >>> cclass("^<>'{}|^`")
204
- # "[^<>'{}|^`]"
205
- # >>> cclass("#x0300-#x036F")
206
- # "[\\u0300-\\u036F]"
207
- # >>> cclass("#xC0-#xD6")
208
- # "[\\u00C0-\\u00D6]"
209
- # >>> cclass("#x370-#x37D")
210
- # "[\\u0370-\\u037D]"
211
- #
212
- # as in: ECHAR ::= '\' [tbnrf\"']
213
- # >>> cclass("tbnrf\\\"'")
214
- # 'tbnrf\\\\\\"\''
215
- #
216
- # >>> cclass("^#x22#x5C#x0A#x0D")
217
- # '^\\u0022\\\\\\u005C\\u000A\\u000D'
218
- def cclass(txt)
219
- '[' +
220
- txt.gsub(/\#x[0-9a-fA-F]+/) do |hx|
221
- hx = hx[2..-1]
222
- if hx.length <= 4
223
- "\\u#{'0' * (4 - hx.length)}#{hx}"
224
- elsif hx.length <= 8
225
- "\\U#{'0' * (8 - hx.length)}#{hx}"
226
- end
227
- end +
228
- ']'
229
- end
230
- end
231
-
232
- # Abstract syntax tree from parse
233
- attr_reader :ast
234
-
235
- # Parse the string or file input generating an abstract syntax tree
236
- # in S-Expressions (similar to SPARQL SSE)
237
- #
238
- # @param [#read, #to_s] input
239
- # @param [Hash{Symbol => Object}] options
240
- # @option options [Boolean, Array] :debug
241
- # Output debug information to an array or STDOUT.
242
- def initialize(input, options = {})
243
- @options = options
244
- @lineno, @depth = 1, 0
245
- token = false
246
- @ast = []
247
-
248
- input = input.respond_to?(:read) ? input.read : input.to_s
249
- scanner = StringScanner.new(input)
250
-
251
- eachRule(scanner) do |r|
252
- debug("rule string") {r.inspect}
253
- case r
254
- when /^@terminals/
255
- # Switch mode to parsing tokens
256
- token = true
257
- when /^@pass\s*(.*)$/m
258
- rule = depth {ruleParts("[0] " + r)}
259
- rule.kind = :pass
260
- rule.orig = r
261
- @ast << rule
262
- else
263
- rule = depth {ruleParts(r)}
264
-
265
- # all caps symbols are tokens. Once a token is seen
266
- # we don't go back
267
- token ||= !!(rule.sym.to_s =~ /^[A-Z_]+$/)
268
- rule.kind = token ? :token : :rule
269
- rule.orig = r
270
- @ast << rule
271
- end
272
- end
273
- end
274
-
275
- ##
276
- # Write out parsed syntax string as an S-Expression
277
- # @return [String]
278
- def to_sxp
279
- begin
280
- require 'sxp'
281
- SXP::Generator.string(ast)
282
- rescue LoadError
283
- ast.to_sxp
284
- end
285
- end
286
-
287
- ##
288
- # Write out syntax tree as Turtle
289
- # @param [String] prefix for language
290
- # @param [String] ns URI for language
291
- # @return [String]
292
- def to_ttl(prefix, ns)
293
- token = false
294
-
295
- unless ast.empty?
296
- [
297
- "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.",
298
- "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.",
299
- "@prefix #{prefix}: <#{ns}>.",
300
- "@prefix : <#{ns}>.",
301
- "@prefix re: <http://www.w3.org/2000/10/swap/grammar/regex#>.",
302
- "@prefix g: <http://www.w3.org/2000/10/swap/grammar/ebnf#>.",
303
- "",
304
- ":language rdfs:isDefinedBy <>; g:start :#{ast.first.id}.",
305
- "",
306
- ]
307
- end.join("\n") +
308
-
309
- ast.
310
- select {|a| [:rule, :token].include?(a.kind)}.
311
- map(&:to_ttl).
312
- join("\n")
313
- end
314
-
315
- ##
316
- # Iterate over rule strings.
317
- # a line that starts with '\[' or '@' starts a new rule
318
- #
319
- # @param [StringScanner] scanner
320
- # @yield rule_string
321
- # @yieldparam [String] rule_string
322
- def eachRule(scanner)
323
- cur_lineno = 1
324
- r = ''
325
- until scanner.eos?
326
- case
327
- when s = scanner.scan(%r(\s+)m)
328
- # Eat whitespace
329
- cur_lineno += s.count("\n")
330
- #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
331
- when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
332
- # Eat comments
333
- cur_lineno += s.count("\n")
334
- debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
335
- when s = scanner.scan(%r(^@terminals))
336
- #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
337
- yield(r) unless r.empty?
338
- @lineno = cur_lineno
339
- yield(s)
340
- r = ''
341
- when s = scanner.scan(/@pass/)
342
- # Found rule start, if we've already collected a rule, yield it
343
- #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
344
- yield r unless r.empty?
345
- @lineno = cur_lineno
346
- r = s
347
- when s = scanner.scan(/\[(?=\w+\])/)
348
- # Found rule start, if we've already collected a rule, yield it
349
- yield r unless r.empty?
350
- #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
351
- @lineno = cur_lineno
352
- r = s
353
- else
354
- # Collect until end of line, or start of comment
355
- s = scanner.scan_until(%r((?:/\*)|$)m)
356
- cur_lineno += s.count("\n")
357
- #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
358
- r += s
359
- end
360
- end
361
- yield r unless r.empty?
362
- end
363
-
364
- ##
365
- # Parse a rule into a rule number, a symbol and an expression
366
- #
367
- # @param [String] rule
368
- # @return [Rule]
369
- def ruleParts(rule)
370
- num_sym, expr = rule.split('::=', 2).map(&:strip)
371
- num, sym = num_sym.split(']', 2).map(&:strip)
372
- num = num[1..-1]
373
- r = Rule.new(sym && sym.to_sym, num, ebnf(expr).first, self)
374
- debug("ruleParts") { r.inspect }
375
- r
376
- end
377
-
378
- ##
379
- # Parse a string into an expression tree and a remaining string
380
- #
381
- # @example
382
- # >>> ebnf("a b c")
383
- # ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
384
- #
385
- # >>> ebnf("a? b+ c*")
386
- # ((seq, \[(opt, ('id', 'a')), (plus, ('id', 'b')), ('*', ('id', 'c'))\]), '')
387
- #
388
- # >>> ebnf(" | x xlist")
389
- # ((alt, \[(seq, \[\]), (seq, \[('id', 'x'), ('id', 'xlist')\])\]), '')
390
- #
391
- # >>> ebnf("a | (b - c)")
392
- # ((alt, \[('id', 'a'), (diff, \[('id', 'b'), ('id', 'c')\])\]), '')
393
- #
394
- # >>> ebnf("a b | c d")
395
- # ((alt, \[(seq, \[('id', 'a'), ('id', 'b')\]), (seq, \[('id', 'c'), ('id', 'd')\])\]), '')
396
- #
397
- # >>> ebnf("a | b | c")
398
- # ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
399
- #
400
- # >>> ebnf("a) b c")
401
- # (('id', 'a'), ' b c')
402
- #
403
- # >>> ebnf("BaseDecl? PrefixDecl*")
404
- # ((seq, \[(opt, ('id', 'BaseDecl')), ('*', ('id', 'PrefixDecl'))\]), '')
405
- #
406
- # >>> ebnf("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
407
- # ((alt, \[('id', 'NCCHAR1'), ("'", diff), (range, '0-9'), (hex, '#x00B7'), (range, '#x0300-#x036F'), (range, '#x203F-#x2040')\]), '')
408
- #
409
- # @param [String] s
410
- # @return [Array]
411
- def ebnf(s)
412
- debug("ebnf") {"(#{s.inspect})"}
413
- e, s = depth {alt(s)}
414
- debug {"=> alt returned #{[e, s].inspect}"}
415
- unless s.empty?
416
- t, ss = depth {token(s)}
417
- debug {"=> token returned #{[t, ss].inspect}"}
418
- return [e, ss] if t.is_a?(Array) && t.first == :")"
419
- end
420
- [e, s]
421
- end
422
-
423
- ##
424
- # Parse alt
425
- # >>> alt("a | b | c")
426
- # ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
427
- # @param [String] s
428
- # @return [Array]
429
- def alt(s)
430
- debug("alt") {"(#{s.inspect})"}
431
- args = []
432
- while !s.empty?
433
- e, s = depth {seq(s)}
434
- debug {"=> seq returned #{[e, s].inspect}"}
435
- if e.to_s.empty?
436
- break unless args.empty?
437
- e = [:seq, []] # empty sequence
438
- end
439
- args << e
440
- unless s.empty?
441
- t, ss = depth {token(s)}
442
- break unless t[0] == :alt
443
- s = ss
444
- end
445
- end
446
- args.length > 1 ? [args.unshift(:alt), s] : [e, s]
447
- end
448
-
449
- ##
450
- # parse seq
451
- #
452
- # >>> seq("a b c")
453
- # ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
454
- #
455
- # >>> seq("a b? c")
456
- # ((seq, \[('id', 'a'), (opt, ('id', 'b')), ('id', 'c')\]), '')
457
- def seq(s)
458
- debug("seq") {"(#{s.inspect})"}
459
- args = []
460
- while !s.empty?
461
- e, ss = depth {diff(s)}
462
- debug {"=> diff returned #{[e, ss].inspect}"}
463
- unless e.to_s.empty?
464
- args << e
465
- s = ss
466
- else
467
- break;
468
- end
469
- end
470
- if args.length > 1
471
- [args.unshift(:seq), s]
472
- elsif args.length == 1
473
- args + [s]
474
- else
475
- ["", s]
476
- end
477
- end
478
-
479
- ##
480
- # parse diff
481
- #
482
- # >>> diff("a - b")
483
- # ((diff, \[('id', 'a'), ('id', 'b')\]), '')
484
- def diff(s)
485
- debug("diff") {"(#{s.inspect})"}
486
- e1, s = depth {postfix(s)}
487
- debug {"=> postfix returned #{[e1, s].inspect}"}
488
- unless e1.to_s.empty?
489
- unless s.empty?
490
- t, ss = depth {token(s)}
491
- debug {"diff #{[t, ss].inspect}"}
492
- if t.is_a?(Array) && t.first == :diff
493
- s = ss
494
- e2, s = primary(s)
495
- unless e2.to_s.empty?
496
- return [[:diff, e1, e2], s]
497
- else
498
- raise "Syntax Error"
499
- end
500
- end
501
- end
502
- end
503
- [e1, s]
504
- end
505
-
506
- ##
507
- # parse postfix
508
- #
509
- # >>> postfix("a b c")
510
- # (('id', 'a'), ' b c')
511
- #
512
- # >>> postfix("a? b c")
513
- # ((opt, ('id', 'a')), ' b c')
514
- def postfix(s)
515
- debug("postfix") {"(#{s.inspect})"}
516
- e, s = depth {primary(s)}
517
- debug {"=> primary returned #{[e, s].inspect}"}
518
- return ["", s] if e.to_s.empty?
519
- if !s.empty?
520
- t, ss = depth {token(s)}
521
- debug {"=> #{[t, ss].inspect}"}
522
- if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
523
- return [[t.first, e], ss]
524
- end
525
- end
526
- [e, s]
527
- end
528
-
529
- ##
530
- # parse primary
531
- #
532
- # >>> primary("a b c")
533
- # (('id', 'a'), ' b c')
534
- def primary(s)
535
- debug("primary") {"(#{s.inspect})"}
536
- t, s = depth {token(s)}
537
- debug {"=> token returned #{[t, s].inspect}"}
538
- if t.is_a?(Symbol) || t.is_a?(String)
539
- [t, s]
540
- elsif %w(range hex).map(&:to_sym).include?(t.first)
541
- [t, s]
542
- elsif t.first == :"("
543
- e, s = depth {ebnf(s)}
544
- debug {"=> ebnf returned #{[e, s].inspect}"}
545
- [e, s]
546
- else
547
- ["", s]
548
- end
549
- end
550
-
551
- ##
552
- # parse one token; return the token and the remaining string
553
- #
554
- # A token is represented as a tuple whose 1st item gives the type;
555
- # some types have additional info in the tuple.
556
- #
557
- # @example
558
- # >>> token("'abc' def")
559
- # (("'", 'abc'), ' def')
560
- #
561
- # >>> token("[0-9]")
562
- # ((range, '0-9'), '')
563
- # >>> token("#x00B7")
564
- # ((hex, '#x00B7'), '')
565
- # >>> token ("\[#x0300-#x036F\]")
566
- # ((range, '#x0300-#x036F'), '')
567
- # >>> token("\[^<>'{}|^`\]-\[#x00-#x20\]")
568
- # ((range, "^<>'{}|^`"), '-\[#x00-#x20\]')
569
- def token(s)
570
- s = s.strip
571
- case m = s[0,1]
572
- when '"', "'"
573
- l, s = s[1..-1].split(m, 2)
574
- [l, s]
575
- when '['
576
- l, s = s[1..-1].split(']', 2)
577
- [[:range, l], s]
578
- when '#'
579
- s.match(/(#\w+)(.*)$/)
580
- l, s = $1, $2
581
- [[:hex, l], s]
582
- when /[[:alpha:]]/
583
- s.match(/(\w+)(.*)$/)
584
- l, s = $1, $2
585
- [l.to_sym, s]
586
- when '@'
587
- s.match(/@(#\w+)(.*)$/)
588
- l, s = $1, $2
589
- [[:"@", l], s]
590
- when '-'
591
- [[:diff], s[1..-1]]
592
- when '?'
593
- [[:opt], s[1..-1]]
594
- when '|'
595
- [[:alt], s[1..-1]]
596
- when '+'
597
- [[:plus], s[1..-1]]
598
- when '*'
599
- [[:star], s[1..-1]]
600
- when /[\(\)]/
601
- [[m.to_sym], s[1..-1]]
602
- else
603
- raise "unrecognized token: #{s.inspect}"
604
- end
605
- end
606
-
607
- def depth
608
- @depth += 1
609
- ret = yield
610
- @depth -= 1
611
- ret
612
- end
613
-
614
- ##
615
- # Progress output when debugging
616
- #
617
- # @overload debug(node, message)
618
- # @param [String] node relative location in input
619
- # @param [String] message ("")
620
- #
621
- # @overload debug(message)
622
- # @param [String] message ("")
623
- #
624
- # @yieldreturn [String] added to message
625
- def debug(*args)
626
- return unless @options[:debug]
627
- options = args.last.is_a?(Hash) ? args.pop : {}
628
- depth = options[:depth] || @depth
629
- message = args.pop
630
- message = message.call if message.is_a?(Proc)
631
- args << message if message
632
- args << yield if block_given?
633
- message = "#{args.join(': ')}"
634
- str = "[#{@lineno}]#{' ' * depth}#{message}"
635
- @options[:debug] << str if @options[:debug].is_a?(Array)
636
- $stderr.puts(str) if @options[:debug] == true
637
- end
638
- end