rdf-turtle 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,174 +1,152 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rdf-turtle
3
- version: !ruby/object:Gem::Version
4
- hash: 23
5
- prerelease:
6
- segments:
7
- - 1
8
- - 0
9
- - 0
10
- version: 1.0.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.2
11
5
  platform: ruby
12
- authors:
6
+ authors:
13
7
  - Gregg Kellogg
14
8
  autorequire:
15
9
  bindir: bin
16
10
  cert_chain: []
17
-
18
- date: 2013-01-19 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
11
+ date: 2013-03-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
21
14
  name: rdf
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
22
21
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 15
29
- segments:
30
- - 1
31
- - 0
32
- version: "1.0"
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: ebnf
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.2.1
33
34
  type: :runtime
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: open-uri-cached
37
35
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- hash: 21
44
- segments:
45
- - 0
46
- - 0
47
- - 5
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.2.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: open-uri-cached
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
48
47
  version: 0.0.5
49
48
  type: :development
50
- version_requirements: *id002
51
- - !ruby/object:Gem::Dependency
52
- name: rspec
53
49
  prerelease: false
54
- requirement: &id003 !ruby/object:Gem::Requirement
55
- none: false
56
- requirements:
57
- - - ">="
58
- - !ruby/object:Gem::Version
59
- hash: 63
60
- segments:
61
- - 2
62
- - 12
63
- - 0
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.0.5
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
64
61
  version: 2.12.0
65
62
  type: :development
66
- version_requirements: *id003
67
- - !ruby/object:Gem::Dependency
68
- name: rdf-isomorphic
69
63
  prerelease: false
70
- requirement: &id004 !ruby/object:Gem::Requirement
71
- none: false
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- hash: 3
76
- segments:
77
- - 0
78
- version: "0"
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: 2.12.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: rdf-isomorphic
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
79
76
  type: :development
80
- version_requirements: *id004
81
- - !ruby/object:Gem::Dependency
82
- name: json-ld
83
77
  prerelease: false
84
- requirement: &id005 !ruby/object:Gem::Requirement
85
- none: false
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- hash: 3
90
- segments:
91
- - 0
92
- version: "0"
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: json-ld
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
93
90
  type: :development
94
- version_requirements: *id005
95
- - !ruby/object:Gem::Dependency
96
- name: yard
97
91
  prerelease: false
98
- requirement: &id006 !ruby/object:Gem::Requirement
99
- none: false
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- hash: 57
104
- segments:
105
- - 0
106
- - 8
107
- - 3
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: yard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
108
103
  version: 0.8.3
109
104
  type: :development
110
- version_requirements: *id006
111
- - !ruby/object:Gem::Dependency
112
- name: sxp
113
105
  prerelease: false
114
- requirement: &id007 !ruby/object:Gem::Requirement
115
- none: false
116
- requirements:
117
- - - ">="
118
- - !ruby/object:Gem::Version
119
- hash: 3
120
- segments:
121
- - 0
122
- version: "0"
123
- type: :development
124
- version_requirements: *id007
125
- - !ruby/object:Gem::Dependency
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 0.8.3
111
+ - !ruby/object:Gem::Dependency
126
112
  name: rdf-spec
127
- prerelease: false
128
- requirement: &id008 !ruby/object:Gem::Requirement
129
- none: false
130
- requirements:
131
- - - ">="
132
- - !ruby/object:Gem::Version
133
- hash: 15
134
- segments:
135
- - 1
136
- - 0
137
- version: "1.0"
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '1.0'
138
118
  type: :development
139
- version_requirements: *id008
140
- - !ruby/object:Gem::Dependency
141
- name: rake
142
119
  prerelease: false
143
- requirement: &id009 !ruby/object:Gem::Requirement
144
- none: false
145
- requirements:
146
- - - ">="
147
- - !ruby/object:Gem::Version
148
- hash: 3
149
- segments:
150
- - 0
151
- version: "0"
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ! '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '1.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rake
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
152
132
  type: :development
153
- version_requirements: *id009
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
154
139
  description: RDF::Turtle is an Turtle reader/writer for the RDF.rb library suite.
155
140
  email: public-rdf-ruby@w3.org
156
141
  executables: []
157
-
158
142
  extensions: []
159
-
160
143
  extra_rdoc_files: []
161
-
162
- files:
144
+ files:
163
145
  - AUTHORS
164
- - README.markdown
146
+ - README.md
165
147
  - History
166
148
  - UNLICENSE
167
149
  - VERSION
168
- - lib/ebnf.rb
169
- - lib/rdf/ll1/lexer.rb
170
- - lib/rdf/ll1/parser.rb
171
- - lib/rdf/ll1/scanner.rb
172
150
  - lib/rdf/turtle/format.rb
173
151
  - lib/rdf/turtle/meta.rb
174
152
  - lib/rdf/turtle/reader.rb
@@ -177,40 +155,28 @@ files:
177
155
  - lib/rdf/turtle/writer.rb
178
156
  - lib/rdf/turtle.rb
179
157
  homepage: http://github.com/ruby-rdf/rdf-turtle
180
- licenses:
158
+ licenses:
181
159
  - Public Domain
160
+ metadata: {}
182
161
  post_install_message:
183
162
  rdoc_options: []
184
-
185
- require_paths:
163
+ require_paths:
186
164
  - lib
187
- required_ruby_version: !ruby/object:Gem::Requirement
188
- none: false
189
- requirements:
190
- - - ">="
191
- - !ruby/object:Gem::Version
192
- hash: 53
193
- segments:
194
- - 1
195
- - 8
196
- - 1
165
+ required_ruby_version: !ruby/object:Gem::Requirement
166
+ requirements:
167
+ - - ! '>='
168
+ - !ruby/object:Gem::Version
197
169
  version: 1.8.1
198
- required_rubygems_version: !ruby/object:Gem::Requirement
199
- none: false
200
- requirements:
201
- - - ">="
202
- - !ruby/object:Gem::Version
203
- hash: 3
204
- segments:
205
- - 0
206
- version: "0"
170
+ required_rubygems_version: !ruby/object:Gem::Requirement
171
+ requirements:
172
+ - - ! '>='
173
+ - !ruby/object:Gem::Version
174
+ version: '0'
207
175
  requirements: []
208
-
209
176
  rubyforge_project: rdf-turtle
210
- rubygems_version: 1.8.24
177
+ rubygems_version: 2.0.3
211
178
  signing_key:
212
- specification_version: 3
179
+ specification_version: 4
213
180
  summary: Turtle reader/writer for Ruby.
214
181
  test_files: []
215
-
216
182
  has_rdoc: false
@@ -1,638 +0,0 @@
1
- require 'strscan'
2
-
3
- # Extended Bakus-Nour Form (EBNF), being the W3C variation is
4
- # originaly defined in the
5
- # [W3C XML 1.0 Spec](http://www.w3.org/TR/REC-xml/#sec-notation).
6
- #
7
- # This version attempts to be less strict than the strict definition
8
- # to allow for coloquial variations (such as in the Turtle syntax).
9
- #
10
- # A rule takes the following form:
11
- # \[1\] symbol ::= expression
12
- #
13
- # Comments include the content between '/*' and '*/'
14
- #
15
- # @see http://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
16
- # @see http://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
17
- #
18
- # Based on bnf2turtle by Dan Connolly.
19
- #
20
- # Motivation
21
- # ----------
22
- #
23
- # Many specifications include grammars that look formal but are not
24
- # actually checked, by machine, against test data sets. Debugging the
25
- # grammar in the XML specification has been a long, tedious manual
26
- # process. Only when the loop is closed between a fully formal grammar
27
- # and a large test data set can we be confident that we have an accurate
28
- # specification of a language (and even then, only the syntax of the language).
29
- #
30
- #
31
- # The grammar in the [N3 design note][] has evolved based on the original
32
- # manual transcription into a python recursive-descent parser and
33
- # subsequent development of test cases. Rather than maintain the grammar
34
- # and the parser independently, our [goal] is to formalize the language
35
- # syntax sufficiently to replace the manual implementation with one
36
- # derived mechanically from the specification.
37
- #
38
- #
39
- # [N3 design note]: http://www.w3.org/DesignIssues/Notation3
40
- #
41
- # Related Work
42
- # ------------
43
- #
44
- # Sean Palmer's [n3p announcement][] demonstrated the feasibility of the
45
- # approach, though that work did not cover some aspects of N3.
46
- #
47
- # In development of the [SPARQL specification][], Eric Prud'hommeaux
48
- # developed [Yacker][], which converts EBNF syntax to perl and C and C++
49
- # yacc grammars. It includes an interactive facility for checking
50
- # strings against the resulting grammars.
51
- # Yosi Scharf used it in [cwm Release 1.1.0rc1][], which includes
52
- # a SPAQRL parser that is *almost* completely mechanically generated.
53
- #
54
- # The N3/turtle output from yacker is lower level than the EBNF notation
55
- # from the XML specification; it has the ?, +, and * operators compiled
56
- # down to pure context-free rules, obscuring the grammar
57
- # structure. Since that transformation is straightforwardly expressed in
58
- # semantic web rules (see [bnf-rules.n3][]), it seems best to keep the RDF
59
- # expression of the grammar in terms of the higher level EBNF
60
- # constructs.
61
- #
62
- # [goal]: http://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
63
- # [n3p announcement]: http://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
64
- # [Yacker]: http://www.w3.org/1999/02/26-modules/User/Yacker
65
- # [SPARQL specification]: http://www.w3.org/TR/rdf-sparql-query/
66
- # [Cwm Release 1.1.0rc1]: http://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
67
- # [bnf-rules.n3]: http://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
68
- #
69
- # Open Issues and Future Work
70
- # ---------------------------
71
- #
72
- # The yacker output also has the terminals compiled to elaborate regular
73
- # expressions. The best strategy for dealing with lexical tokens is not
74
- # yet clear. Many tokens in SPARQL are case insensitive; this is not yet
75
- # captured formally.
76
- #
77
- # The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...)
78
- # is not yet published; it should be aligned with [swap/grammar/bnf][]
79
- # and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar
80
- # in the SPARQL and XML specificiations).
81
- #
82
- # It would be interesting to corroborate the claim in the SPARQL spec
83
- # that the grammar is LL(1) with a mechanical proof based on N3 rules.
84
- #
85
- # [swap/grammar/bnf]: http://www.w3.org/2000/10/swap/grammar/bnf
86
- # [bnf2html.n3]: http://www.w3.org/2000/10/swap/grammar/bnf2html.n3
87
- #
88
- # Background
89
- # ----------
90
- #
91
- # The [N3 Primer] by Tim Berners-Lee introduces RDF and the Semantic
92
- # web using N3, a teaching and scribbling language. Turtle is a subset
93
- # of N3 that maps directly to (and from) the standard XML syntax for
94
- # RDF.
95
- #
96
- # [N3 Primer]: http://www.w3.org/2000/10/swap/Primer.html
97
- #
98
- # @author Gregg Kellogg
99
- class EBNF
100
- class Rule
101
- # @!attribute [r] sym for rule
102
- # @return [Symbol]
103
- attr_reader :sym
104
-
105
- # @!attribute [r] id of rule
106
- # @return [String]
107
- attr_reader :id
108
-
109
- # @!attribute [r] kind of rule
110
- # @return [:rule, :token, or :pass]
111
- attr_accessor :kind
112
-
113
- # @!attribute [r] expr rule expression
114
- # @return [Array]
115
- attr_reader :expr
116
-
117
- # @!attribute [r] orig original rule
118
- # @return [String]
119
- attr_accessor :orig
120
-
121
- # @param [Integer] id
122
- # @param [Symbol] sym
123
- # @param [Array] expr
124
- # @param [EBNF] ebnf
125
- def initialize(id, sym, expr, ebnf)
126
- @id, @sym, @expr, @ebnf = id, sym, expr, ebnf
127
- end
128
-
129
- # Serializes this rule to an S-Expression
130
- # @return [String]
131
- def to_sxp
132
- [id, sym, kind, expr].to_sxp
133
- end
134
-
135
- # Serializes this rule to an Turtle
136
- # @return [String]
137
- def to_ttl
138
- @ebnf.debug("to_ttl") {inspect}
139
- comment = orig.strip.
140
- gsub(/"""/, '\"\"\"').
141
- gsub("\\", "\\\\").
142
- sub(/^\"/, '\"').
143
- sub(/\"$/m, '\"')
144
- statements = [
145
- %{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
146
- %{ rdfs:comment #{comment.inspect};},
147
- ]
148
-
149
- statements += ttl_expr(expr, kind == :token ? "re" : "g", 1, false)
150
- "\n" + statements.join("\n")
151
- end
152
-
153
- def inspect
154
- {:sym => sym, :id => id, kind => kind, :expr => expr}.inspect
155
- end
156
-
157
- private
158
- def ttl_expr(expr, pfx, depth, is_obj = true)
159
- indent = ' ' * depth
160
- @ebnf.debug("ttl_expr", :depth => depth) {expr.inspect}
161
- op = expr.shift if expr.is_a?(Array)
162
- statements = []
163
-
164
- if is_obj
165
- bra, ket = "[ ", " ]"
166
- else
167
- bra = ket = ''
168
- end
169
-
170
- case op
171
- when :seq, :alt, :diff
172
- statements << %{#{indent}#{bra}#{pfx}:#{op} (}
173
- expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
174
- statements << %{#{indent} )#{ket}}
175
- when :opt, :plus, :star
176
- statements << %{#{indent}#{bra}#{pfx}:#{op} }
177
- statements += ttl_expr(expr.first, pfx, depth + 1)
178
- statements << %{#{indent} #{ket}} unless ket.empty?
179
- when :"'"
180
- statements << %{#{indent}"#{esc(expr)}"}
181
- when :range
182
- statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
183
- when :hex
184
- raise "didn't expect \" in expr" if expr.include?(:'"')
185
- statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
186
- else
187
- if is_obj
188
- statements << %{#{indent}#{expr.inspect}}
189
- else
190
- statements << %{#{indent}g:seq ( #{expr.inspect} )}
191
- end
192
- end
193
-
194
- statements.last << " ." unless is_obj
195
- @ebnf.debug("statements", :depth => depth) {statements.join("\n")}
196
- statements
197
- end
198
-
199
- ##
200
- # turn an XML BNF character class into an N3 literal for that
201
- # character class (less the outer quote marks)
202
- #
203
- # >>> cclass("^<>'{}|^`")
204
- # "[^<>'{}|^`]"
205
- # >>> cclass("#x0300-#x036F")
206
- # "[\\u0300-\\u036F]"
207
- # >>> cclass("#xC0-#xD6")
208
- # "[\\u00C0-\\u00D6]"
209
- # >>> cclass("#x370-#x37D")
210
- # "[\\u0370-\\u037D]"
211
- #
212
- # as in: ECHAR ::= '\' [tbnrf\"']
213
- # >>> cclass("tbnrf\\\"'")
214
- # 'tbnrf\\\\\\"\''
215
- #
216
- # >>> cclass("^#x22#x5C#x0A#x0D")
217
- # '^\\u0022\\\\\\u005C\\u000A\\u000D'
218
- def cclass(txt)
219
- '[' +
220
- txt.gsub(/\#x[0-9a-fA-F]+/) do |hx|
221
- hx = hx[2..-1]
222
- if hx.length <= 4
223
- "\\u#{'0' * (4 - hx.length)}#{hx}"
224
- elsif hx.length <= 8
225
- "\\U#{'0' * (8 - hx.length)}#{hx}"
226
- end
227
- end +
228
- ']'
229
- end
230
- end
231
-
232
- # Abstract syntax tree from parse
233
- attr_reader :ast
234
-
235
- # Parse the string or file input generating an abstract syntax tree
236
- # in S-Expressions (similar to SPARQL SSE)
237
- #
238
- # @param [#read, #to_s] input
239
- # @param [Hash{Symbol => Object}] options
240
- # @option options [Boolean, Array] :debug
241
- # Output debug information to an array or STDOUT.
242
- def initialize(input, options = {})
243
- @options = options
244
- @lineno, @depth = 1, 0
245
- token = false
246
- @ast = []
247
-
248
- input = input.respond_to?(:read) ? input.read : input.to_s
249
- scanner = StringScanner.new(input)
250
-
251
- eachRule(scanner) do |r|
252
- debug("rule string") {r.inspect}
253
- case r
254
- when /^@terminals/
255
- # Switch mode to parsing tokens
256
- token = true
257
- when /^@pass\s*(.*)$/m
258
- rule = depth {ruleParts("[0] " + r)}
259
- rule.kind = :pass
260
- rule.orig = r
261
- @ast << rule
262
- else
263
- rule = depth {ruleParts(r)}
264
-
265
- # all caps symbols are tokens. Once a token is seen
266
- # we don't go back
267
- token ||= !!(rule.sym.to_s =~ /^[A-Z_]+$/)
268
- rule.kind = token ? :token : :rule
269
- rule.orig = r
270
- @ast << rule
271
- end
272
- end
273
- end
274
-
275
- ##
276
- # Write out parsed syntax string as an S-Expression
277
- # @return [String]
278
- def to_sxp
279
- begin
280
- require 'sxp'
281
- SXP::Generator.string(ast)
282
- rescue LoadError
283
- ast.to_sxp
284
- end
285
- end
286
-
287
- ##
288
- # Write out syntax tree as Turtle
289
- # @param [String] prefix for language
290
- # @param [String] ns URI for language
291
- # @return [String]
292
- def to_ttl(prefix, ns)
293
- token = false
294
-
295
- unless ast.empty?
296
- [
297
- "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.",
298
- "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.",
299
- "@prefix #{prefix}: <#{ns}>.",
300
- "@prefix : <#{ns}>.",
301
- "@prefix re: <http://www.w3.org/2000/10/swap/grammar/regex#>.",
302
- "@prefix g: <http://www.w3.org/2000/10/swap/grammar/ebnf#>.",
303
- "",
304
- ":language rdfs:isDefinedBy <>; g:start :#{ast.first.id}.",
305
- "",
306
- ]
307
- end.join("\n") +
308
-
309
- ast.
310
- select {|a| [:rule, :token].include?(a.kind)}.
311
- map(&:to_ttl).
312
- join("\n")
313
- end
314
-
315
- ##
316
- # Iterate over rule strings.
317
- # a line that starts with '\[' or '@' starts a new rule
318
- #
319
- # @param [StringScanner] scanner
320
- # @yield rule_string
321
- # @yieldparam [String] rule_string
322
- def eachRule(scanner)
323
- cur_lineno = 1
324
- r = ''
325
- until scanner.eos?
326
- case
327
- when s = scanner.scan(%r(\s+)m)
328
- # Eat whitespace
329
- cur_lineno += s.count("\n")
330
- #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
331
- when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
332
- # Eat comments
333
- cur_lineno += s.count("\n")
334
- debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
335
- when s = scanner.scan(%r(^@terminals))
336
- #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
337
- yield(r) unless r.empty?
338
- @lineno = cur_lineno
339
- yield(s)
340
- r = ''
341
- when s = scanner.scan(/@pass/)
342
- # Found rule start, if we've already collected a rule, yield it
343
- #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
344
- yield r unless r.empty?
345
- @lineno = cur_lineno
346
- r = s
347
- when s = scanner.scan(/\[(?=\w+\])/)
348
- # Found rule start, if we've already collected a rule, yield it
349
- yield r unless r.empty?
350
- #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
351
- @lineno = cur_lineno
352
- r = s
353
- else
354
- # Collect until end of line, or start of comment
355
- s = scanner.scan_until(%r((?:/\*)|$)m)
356
- cur_lineno += s.count("\n")
357
- #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
358
- r += s
359
- end
360
- end
361
- yield r unless r.empty?
362
- end
363
-
364
- ##
365
- # Parse a rule into a rule number, a symbol and an expression
366
- #
367
- # @param [String] rule
368
- # @return [Rule]
369
- def ruleParts(rule)
370
- num_sym, expr = rule.split('::=', 2).map(&:strip)
371
- num, sym = num_sym.split(']', 2).map(&:strip)
372
- num = num[1..-1]
373
- r = Rule.new(sym && sym.to_sym, num, ebnf(expr).first, self)
374
- debug("ruleParts") { r.inspect }
375
- r
376
- end
377
-
378
- ##
379
- # Parse a string into an expression tree and a remaining string
380
- #
381
- # @example
382
- # >>> ebnf("a b c")
383
- # ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
384
- #
385
- # >>> ebnf("a? b+ c*")
386
- # ((seq, \[(opt, ('id', 'a')), (plus, ('id', 'b')), ('*', ('id', 'c'))\]), '')
387
- #
388
- # >>> ebnf(" | x xlist")
389
- # ((alt, \[(seq, \[\]), (seq, \[('id', 'x'), ('id', 'xlist')\])\]), '')
390
- #
391
- # >>> ebnf("a | (b - c)")
392
- # ((alt, \[('id', 'a'), (diff, \[('id', 'b'), ('id', 'c')\])\]), '')
393
- #
394
- # >>> ebnf("a b | c d")
395
- # ((alt, \[(seq, \[('id', 'a'), ('id', 'b')\]), (seq, \[('id', 'c'), ('id', 'd')\])\]), '')
396
- #
397
- # >>> ebnf("a | b | c")
398
- # ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
399
- #
400
- # >>> ebnf("a) b c")
401
- # (('id', 'a'), ' b c')
402
- #
403
- # >>> ebnf("BaseDecl? PrefixDecl*")
404
- # ((seq, \[(opt, ('id', 'BaseDecl')), ('*', ('id', 'PrefixDecl'))\]), '')
405
- #
406
- # >>> ebnf("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
407
- # ((alt, \[('id', 'NCCHAR1'), ("'", diff), (range, '0-9'), (hex, '#x00B7'), (range, '#x0300-#x036F'), (range, '#x203F-#x2040')\]), '')
408
- #
409
- # @param [String] s
410
- # @return [Array]
411
- def ebnf(s)
412
- debug("ebnf") {"(#{s.inspect})"}
413
- e, s = depth {alt(s)}
414
- debug {"=> alt returned #{[e, s].inspect}"}
415
- unless s.empty?
416
- t, ss = depth {token(s)}
417
- debug {"=> token returned #{[t, ss].inspect}"}
418
- return [e, ss] if t.is_a?(Array) && t.first == :")"
419
- end
420
- [e, s]
421
- end
422
-
423
- ##
424
- # Parse alt
425
- # >>> alt("a | b | c")
426
- # ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
427
- # @param [String] s
428
- # @return [Array]
429
- def alt(s)
430
- debug("alt") {"(#{s.inspect})"}
431
- args = []
432
- while !s.empty?
433
- e, s = depth {seq(s)}
434
- debug {"=> seq returned #{[e, s].inspect}"}
435
- if e.to_s.empty?
436
- break unless args.empty?
437
- e = [:seq, []] # empty sequence
438
- end
439
- args << e
440
- unless s.empty?
441
- t, ss = depth {token(s)}
442
- break unless t[0] == :alt
443
- s = ss
444
- end
445
- end
446
- args.length > 1 ? [args.unshift(:alt), s] : [e, s]
447
- end
448
-
449
- ##
450
- # parse seq
451
- #
452
- # >>> seq("a b c")
453
- # ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
454
- #
455
- # >>> seq("a b? c")
456
- # ((seq, \[('id', 'a'), (opt, ('id', 'b')), ('id', 'c')\]), '')
457
- def seq(s)
458
- debug("seq") {"(#{s.inspect})"}
459
- args = []
460
- while !s.empty?
461
- e, ss = depth {diff(s)}
462
- debug {"=> diff returned #{[e, ss].inspect}"}
463
- unless e.to_s.empty?
464
- args << e
465
- s = ss
466
- else
467
- break;
468
- end
469
- end
470
- if args.length > 1
471
- [args.unshift(:seq), s]
472
- elsif args.length == 1
473
- args + [s]
474
- else
475
- ["", s]
476
- end
477
- end
478
-
479
- ##
480
- # parse diff
481
- #
482
- # >>> diff("a - b")
483
- # ((diff, \[('id', 'a'), ('id', 'b')\]), '')
484
- def diff(s)
485
- debug("diff") {"(#{s.inspect})"}
486
- e1, s = depth {postfix(s)}
487
- debug {"=> postfix returned #{[e1, s].inspect}"}
488
- unless e1.to_s.empty?
489
- unless s.empty?
490
- t, ss = depth {token(s)}
491
- debug {"diff #{[t, ss].inspect}"}
492
- if t.is_a?(Array) && t.first == :diff
493
- s = ss
494
- e2, s = primary(s)
495
- unless e2.to_s.empty?
496
- return [[:diff, e1, e2], s]
497
- else
498
- raise "Syntax Error"
499
- end
500
- end
501
- end
502
- end
503
- [e1, s]
504
- end
505
-
506
- ##
507
- # parse postfix
508
- #
509
- # >>> postfix("a b c")
510
- # (('id', 'a'), ' b c')
511
- #
512
- # >>> postfix("a? b c")
513
- # ((opt, ('id', 'a')), ' b c')
514
- def postfix(s)
515
- debug("postfix") {"(#{s.inspect})"}
516
- e, s = depth {primary(s)}
517
- debug {"=> primary returned #{[e, s].inspect}"}
518
- return ["", s] if e.to_s.empty?
519
- if !s.empty?
520
- t, ss = depth {token(s)}
521
- debug {"=> #{[t, ss].inspect}"}
522
- if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
523
- return [[t.first, e], ss]
524
- end
525
- end
526
- [e, s]
527
- end
528
-
529
- ##
530
- # parse primary
531
- #
532
- # >>> primary("a b c")
533
- # (('id', 'a'), ' b c')
534
- def primary(s)
535
- debug("primary") {"(#{s.inspect})"}
536
- t, s = depth {token(s)}
537
- debug {"=> token returned #{[t, s].inspect}"}
538
- if t.is_a?(Symbol) || t.is_a?(String)
539
- [t, s]
540
- elsif %w(range hex).map(&:to_sym).include?(t.first)
541
- [t, s]
542
- elsif t.first == :"("
543
- e, s = depth {ebnf(s)}
544
- debug {"=> ebnf returned #{[e, s].inspect}"}
545
- [e, s]
546
- else
547
- ["", s]
548
- end
549
- end
550
-
551
- ##
552
- # parse one token; return the token and the remaining string
553
- #
554
- # A token is represented as a tuple whose 1st item gives the type;
555
- # some types have additional info in the tuple.
556
- #
557
- # @example
558
- # >>> token("'abc' def")
559
- # (("'", 'abc'), ' def')
560
- #
561
- # >>> token("[0-9]")
562
- # ((range, '0-9'), '')
563
- # >>> token("#x00B7")
564
- # ((hex, '#x00B7'), '')
565
- # >>> token ("\[#x0300-#x036F\]")
566
- # ((range, '#x0300-#x036F'), '')
567
- # >>> token("\[^<>'{}|^`\]-\[#x00-#x20\]")
568
- # ((range, "^<>'{}|^`"), '-\[#x00-#x20\]')
569
- def token(s)
570
- s = s.strip
571
- case m = s[0,1]
572
- when '"', "'"
573
- l, s = s[1..-1].split(m, 2)
574
- [l, s]
575
- when '['
576
- l, s = s[1..-1].split(']', 2)
577
- [[:range, l], s]
578
- when '#'
579
- s.match(/(#\w+)(.*)$/)
580
- l, s = $1, $2
581
- [[:hex, l], s]
582
- when /[[:alpha:]]/
583
- s.match(/(\w+)(.*)$/)
584
- l, s = $1, $2
585
- [l.to_sym, s]
586
- when '@'
587
- s.match(/@(#\w+)(.*)$/)
588
- l, s = $1, $2
589
- [[:"@", l], s]
590
- when '-'
591
- [[:diff], s[1..-1]]
592
- when '?'
593
- [[:opt], s[1..-1]]
594
- when '|'
595
- [[:alt], s[1..-1]]
596
- when '+'
597
- [[:plus], s[1..-1]]
598
- when '*'
599
- [[:star], s[1..-1]]
600
- when /[\(\)]/
601
- [[m.to_sym], s[1..-1]]
602
- else
603
- raise "unrecognized token: #{s.inspect}"
604
- end
605
- end
606
-
607
- def depth
608
- @depth += 1
609
- ret = yield
610
- @depth -= 1
611
- ret
612
- end
613
-
614
- ##
615
- # Progress output when debugging
616
- #
617
- # @overload debug(node, message)
618
- # @param [String] node relative location in input
619
- # @param [String] message ("")
620
- #
621
- # @overload debug(message)
622
- # @param [String] message ("")
623
- #
624
- # @yieldreturn [String] added to message
625
- def debug(*args)
626
- return unless @options[:debug]
627
- options = args.last.is_a?(Hash) ? args.pop : {}
628
- depth = options[:depth] || @depth
629
- message = args.pop
630
- message = message.call if message.is_a?(Proc)
631
- args << message if message
632
- args << yield if block_given?
633
- message = "#{args.join(': ')}"
634
- str = "[#{@lineno}]#{' ' * depth}#{message}"
635
- @options[:debug] << str if @options[:debug].is_a?(Array)
636
- $stderr.puts(str) if @options[:debug] == true
637
- end
638
- end