rdf-turtle 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/History +7 -0
- data/{README.markdown → README.md} +11 -26
- data/VERSION +1 -1
- data/lib/rdf/turtle.rb +1 -2
- data/lib/rdf/turtle/meta.rb +1027 -1175
- data/lib/rdf/turtle/reader.rb +92 -79
- data/lib/rdf/turtle/terminals.rb +31 -31
- data/lib/rdf/turtle/writer.rb +2 -2
- metadata +127 -161
- data/lib/ebnf.rb +0 -638
- data/lib/rdf/ll1/lexer.rb +0 -475
- data/lib/rdf/ll1/parser.rb +0 -541
- data/lib/rdf/ll1/scanner.rb +0 -101
metadata
CHANGED
@@ -1,174 +1,152 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rdf-turtle
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 0
|
9
|
-
- 0
|
10
|
-
version: 1.0.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.2
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- Gregg Kellogg
|
14
8
|
autorequire:
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2013-03-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
21
14
|
name: rdf
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :runtime
|
22
21
|
prerelease: false
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ! '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: ebnf
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.2.1
|
33
34
|
type: :runtime
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: open-uri-cached
|
37
35
|
prerelease: false
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.2.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: open-uri-cached
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
48
47
|
version: 0.0.5
|
49
48
|
type: :development
|
50
|
-
version_requirements: *id002
|
51
|
-
- !ruby/object:Gem::Dependency
|
52
|
-
name: rspec
|
53
49
|
prerelease: false
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.0.5
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
64
61
|
version: 2.12.0
|
65
62
|
type: :development
|
66
|
-
version_requirements: *id003
|
67
|
-
- !ruby/object:Gem::Dependency
|
68
|
-
name: rdf-isomorphic
|
69
63
|
prerelease: false
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 2.12.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rdf-isomorphic
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
79
76
|
type: :development
|
80
|
-
version_requirements: *id004
|
81
|
-
- !ruby/object:Gem::Dependency
|
82
|
-
name: json-ld
|
83
77
|
prerelease: false
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: json-ld
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ! '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
93
90
|
type: :development
|
94
|
-
version_requirements: *id005
|
95
|
-
- !ruby/object:Gem::Dependency
|
96
|
-
name: yard
|
97
91
|
prerelease: false
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: yard
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
108
103
|
version: 0.8.3
|
109
104
|
type: :development
|
110
|
-
version_requirements: *id006
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: sxp
|
113
105
|
prerelease: false
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
segments:
|
121
|
-
- 0
|
122
|
-
version: "0"
|
123
|
-
type: :development
|
124
|
-
version_requirements: *id007
|
125
|
-
- !ruby/object:Gem::Dependency
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ! '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.8.3
|
111
|
+
- !ruby/object:Gem::Dependency
|
126
112
|
name: rdf-spec
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
- !ruby/object:Gem::Version
|
133
|
-
hash: 15
|
134
|
-
segments:
|
135
|
-
- 1
|
136
|
-
- 0
|
137
|
-
version: "1.0"
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '1.0'
|
138
118
|
type: :development
|
139
|
-
version_requirements: *id008
|
140
|
-
- !ruby/object:Gem::Dependency
|
141
|
-
name: rake
|
142
119
|
prerelease: false
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ! '>='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '1.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rake
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ! '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
152
132
|
type: :development
|
153
|
-
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ! '>='
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
154
139
|
description: RDF::Turtle is an Turtle reader/writer for the RDF.rb library suite.
|
155
140
|
email: public-rdf-ruby@w3.org
|
156
141
|
executables: []
|
157
|
-
|
158
142
|
extensions: []
|
159
|
-
|
160
143
|
extra_rdoc_files: []
|
161
|
-
|
162
|
-
files:
|
144
|
+
files:
|
163
145
|
- AUTHORS
|
164
|
-
- README.
|
146
|
+
- README.md
|
165
147
|
- History
|
166
148
|
- UNLICENSE
|
167
149
|
- VERSION
|
168
|
-
- lib/ebnf.rb
|
169
|
-
- lib/rdf/ll1/lexer.rb
|
170
|
-
- lib/rdf/ll1/parser.rb
|
171
|
-
- lib/rdf/ll1/scanner.rb
|
172
150
|
- lib/rdf/turtle/format.rb
|
173
151
|
- lib/rdf/turtle/meta.rb
|
174
152
|
- lib/rdf/turtle/reader.rb
|
@@ -177,40 +155,28 @@ files:
|
|
177
155
|
- lib/rdf/turtle/writer.rb
|
178
156
|
- lib/rdf/turtle.rb
|
179
157
|
homepage: http://github.com/ruby-rdf/rdf-turtle
|
180
|
-
licenses:
|
158
|
+
licenses:
|
181
159
|
- Public Domain
|
160
|
+
metadata: {}
|
182
161
|
post_install_message:
|
183
162
|
rdoc_options: []
|
184
|
-
|
185
|
-
require_paths:
|
163
|
+
require_paths:
|
186
164
|
- lib
|
187
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
- !ruby/object:Gem::Version
|
192
|
-
hash: 53
|
193
|
-
segments:
|
194
|
-
- 1
|
195
|
-
- 8
|
196
|
-
- 1
|
165
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
166
|
+
requirements:
|
167
|
+
- - ! '>='
|
168
|
+
- !ruby/object:Gem::Version
|
197
169
|
version: 1.8.1
|
198
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
hash: 3
|
204
|
-
segments:
|
205
|
-
- 0
|
206
|
-
version: "0"
|
170
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
|
+
requirements:
|
172
|
+
- - ! '>='
|
173
|
+
- !ruby/object:Gem::Version
|
174
|
+
version: '0'
|
207
175
|
requirements: []
|
208
|
-
|
209
176
|
rubyforge_project: rdf-turtle
|
210
|
-
rubygems_version:
|
177
|
+
rubygems_version: 2.0.3
|
211
178
|
signing_key:
|
212
|
-
specification_version:
|
179
|
+
specification_version: 4
|
213
180
|
summary: Turtle reader/writer for Ruby.
|
214
181
|
test_files: []
|
215
|
-
|
216
182
|
has_rdoc: false
|
data/lib/ebnf.rb
DELETED
@@ -1,638 +0,0 @@
|
|
1
|
-
require 'strscan'
|
2
|
-
|
3
|
-
# Extended Bakus-Nour Form (EBNF), being the W3C variation is
|
4
|
-
# originaly defined in the
|
5
|
-
# [W3C XML 1.0 Spec](http://www.w3.org/TR/REC-xml/#sec-notation).
|
6
|
-
#
|
7
|
-
# This version attempts to be less strict than the strict definition
|
8
|
-
# to allow for coloquial variations (such as in the Turtle syntax).
|
9
|
-
#
|
10
|
-
# A rule takes the following form:
|
11
|
-
# \[1\] symbol ::= expression
|
12
|
-
#
|
13
|
-
# Comments include the content between '/*' and '*/'
|
14
|
-
#
|
15
|
-
# @see http://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
|
16
|
-
# @see http://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
|
17
|
-
#
|
18
|
-
# Based on bnf2turtle by Dan Connolly.
|
19
|
-
#
|
20
|
-
# Motivation
|
21
|
-
# ----------
|
22
|
-
#
|
23
|
-
# Many specifications include grammars that look formal but are not
|
24
|
-
# actually checked, by machine, against test data sets. Debugging the
|
25
|
-
# grammar in the XML specification has been a long, tedious manual
|
26
|
-
# process. Only when the loop is closed between a fully formal grammar
|
27
|
-
# and a large test data set can we be confident that we have an accurate
|
28
|
-
# specification of a language (and even then, only the syntax of the language).
|
29
|
-
#
|
30
|
-
#
|
31
|
-
# The grammar in the [N3 design note][] has evolved based on the original
|
32
|
-
# manual transcription into a python recursive-descent parser and
|
33
|
-
# subsequent development of test cases. Rather than maintain the grammar
|
34
|
-
# and the parser independently, our [goal] is to formalize the language
|
35
|
-
# syntax sufficiently to replace the manual implementation with one
|
36
|
-
# derived mechanically from the specification.
|
37
|
-
#
|
38
|
-
#
|
39
|
-
# [N3 design note]: http://www.w3.org/DesignIssues/Notation3
|
40
|
-
#
|
41
|
-
# Related Work
|
42
|
-
# ------------
|
43
|
-
#
|
44
|
-
# Sean Palmer's [n3p announcement][] demonstrated the feasibility of the
|
45
|
-
# approach, though that work did not cover some aspects of N3.
|
46
|
-
#
|
47
|
-
# In development of the [SPARQL specification][], Eric Prud'hommeaux
|
48
|
-
# developed [Yacker][], which converts EBNF syntax to perl and C and C++
|
49
|
-
# yacc grammars. It includes an interactive facility for checking
|
50
|
-
# strings against the resulting grammars.
|
51
|
-
# Yosi Scharf used it in [cwm Release 1.1.0rc1][], which includes
|
52
|
-
# a SPAQRL parser that is *almost* completely mechanically generated.
|
53
|
-
#
|
54
|
-
# The N3/turtle output from yacker is lower level than the EBNF notation
|
55
|
-
# from the XML specification; it has the ?, +, and * operators compiled
|
56
|
-
# down to pure context-free rules, obscuring the grammar
|
57
|
-
# structure. Since that transformation is straightforwardly expressed in
|
58
|
-
# semantic web rules (see [bnf-rules.n3][]), it seems best to keep the RDF
|
59
|
-
# expression of the grammar in terms of the higher level EBNF
|
60
|
-
# constructs.
|
61
|
-
#
|
62
|
-
# [goal]: http://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
|
63
|
-
# [n3p announcement]: http://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
|
64
|
-
# [Yacker]: http://www.w3.org/1999/02/26-modules/User/Yacker
|
65
|
-
# [SPARQL specification]: http://www.w3.org/TR/rdf-sparql-query/
|
66
|
-
# [Cwm Release 1.1.0rc1]: http://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
|
67
|
-
# [bnf-rules.n3]: http://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
|
68
|
-
#
|
69
|
-
# Open Issues and Future Work
|
70
|
-
# ---------------------------
|
71
|
-
#
|
72
|
-
# The yacker output also has the terminals compiled to elaborate regular
|
73
|
-
# expressions. The best strategy for dealing with lexical tokens is not
|
74
|
-
# yet clear. Many tokens in SPARQL are case insensitive; this is not yet
|
75
|
-
# captured formally.
|
76
|
-
#
|
77
|
-
# The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...)
|
78
|
-
# is not yet published; it should be aligned with [swap/grammar/bnf][]
|
79
|
-
# and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar
|
80
|
-
# in the SPARQL and XML specificiations).
|
81
|
-
#
|
82
|
-
# It would be interesting to corroborate the claim in the SPARQL spec
|
83
|
-
# that the grammar is LL(1) with a mechanical proof based on N3 rules.
|
84
|
-
#
|
85
|
-
# [swap/grammar/bnf]: http://www.w3.org/2000/10/swap/grammar/bnf
|
86
|
-
# [bnf2html.n3]: http://www.w3.org/2000/10/swap/grammar/bnf2html.n3
|
87
|
-
#
|
88
|
-
# Background
|
89
|
-
# ----------
|
90
|
-
#
|
91
|
-
# The [N3 Primer] by Tim Berners-Lee introduces RDF and the Semantic
|
92
|
-
# web using N3, a teaching and scribbling language. Turtle is a subset
|
93
|
-
# of N3 that maps directly to (and from) the standard XML syntax for
|
94
|
-
# RDF.
|
95
|
-
#
|
96
|
-
# [N3 Primer]: http://www.w3.org/2000/10/swap/Primer.html
|
97
|
-
#
|
98
|
-
# @author Gregg Kellogg
|
99
|
-
class EBNF
|
100
|
-
class Rule
|
101
|
-
# @!attribute [r] sym for rule
|
102
|
-
# @return [Symbol]
|
103
|
-
attr_reader :sym
|
104
|
-
|
105
|
-
# @!attribute [r] id of rule
|
106
|
-
# @return [String]
|
107
|
-
attr_reader :id
|
108
|
-
|
109
|
-
# @!attribute [r] kind of rule
|
110
|
-
# @return [:rule, :token, or :pass]
|
111
|
-
attr_accessor :kind
|
112
|
-
|
113
|
-
# @!attribute [r] expr rule expression
|
114
|
-
# @return [Array]
|
115
|
-
attr_reader :expr
|
116
|
-
|
117
|
-
# @!attribute [r] orig original rule
|
118
|
-
# @return [String]
|
119
|
-
attr_accessor :orig
|
120
|
-
|
121
|
-
# @param [Integer] id
|
122
|
-
# @param [Symbol] sym
|
123
|
-
# @param [Array] expr
|
124
|
-
# @param [EBNF] ebnf
|
125
|
-
def initialize(id, sym, expr, ebnf)
|
126
|
-
@id, @sym, @expr, @ebnf = id, sym, expr, ebnf
|
127
|
-
end
|
128
|
-
|
129
|
-
# Serializes this rule to an S-Expression
|
130
|
-
# @return [String]
|
131
|
-
def to_sxp
|
132
|
-
[id, sym, kind, expr].to_sxp
|
133
|
-
end
|
134
|
-
|
135
|
-
# Serializes this rule to an Turtle
|
136
|
-
# @return [String]
|
137
|
-
def to_ttl
|
138
|
-
@ebnf.debug("to_ttl") {inspect}
|
139
|
-
comment = orig.strip.
|
140
|
-
gsub(/"""/, '\"\"\"').
|
141
|
-
gsub("\\", "\\\\").
|
142
|
-
sub(/^\"/, '\"').
|
143
|
-
sub(/\"$/m, '\"')
|
144
|
-
statements = [
|
145
|
-
%{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
|
146
|
-
%{ rdfs:comment #{comment.inspect};},
|
147
|
-
]
|
148
|
-
|
149
|
-
statements += ttl_expr(expr, kind == :token ? "re" : "g", 1, false)
|
150
|
-
"\n" + statements.join("\n")
|
151
|
-
end
|
152
|
-
|
153
|
-
def inspect
|
154
|
-
{:sym => sym, :id => id, kind => kind, :expr => expr}.inspect
|
155
|
-
end
|
156
|
-
|
157
|
-
private
|
158
|
-
def ttl_expr(expr, pfx, depth, is_obj = true)
|
159
|
-
indent = ' ' * depth
|
160
|
-
@ebnf.debug("ttl_expr", :depth => depth) {expr.inspect}
|
161
|
-
op = expr.shift if expr.is_a?(Array)
|
162
|
-
statements = []
|
163
|
-
|
164
|
-
if is_obj
|
165
|
-
bra, ket = "[ ", " ]"
|
166
|
-
else
|
167
|
-
bra = ket = ''
|
168
|
-
end
|
169
|
-
|
170
|
-
case op
|
171
|
-
when :seq, :alt, :diff
|
172
|
-
statements << %{#{indent}#{bra}#{pfx}:#{op} (}
|
173
|
-
expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
|
174
|
-
statements << %{#{indent} )#{ket}}
|
175
|
-
when :opt, :plus, :star
|
176
|
-
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
177
|
-
statements += ttl_expr(expr.first, pfx, depth + 1)
|
178
|
-
statements << %{#{indent} #{ket}} unless ket.empty?
|
179
|
-
when :"'"
|
180
|
-
statements << %{#{indent}"#{esc(expr)}"}
|
181
|
-
when :range
|
182
|
-
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
183
|
-
when :hex
|
184
|
-
raise "didn't expect \" in expr" if expr.include?(:'"')
|
185
|
-
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
186
|
-
else
|
187
|
-
if is_obj
|
188
|
-
statements << %{#{indent}#{expr.inspect}}
|
189
|
-
else
|
190
|
-
statements << %{#{indent}g:seq ( #{expr.inspect} )}
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
statements.last << " ." unless is_obj
|
195
|
-
@ebnf.debug("statements", :depth => depth) {statements.join("\n")}
|
196
|
-
statements
|
197
|
-
end
|
198
|
-
|
199
|
-
##
|
200
|
-
# turn an XML BNF character class into an N3 literal for that
|
201
|
-
# character class (less the outer quote marks)
|
202
|
-
#
|
203
|
-
# >>> cclass("^<>'{}|^`")
|
204
|
-
# "[^<>'{}|^`]"
|
205
|
-
# >>> cclass("#x0300-#x036F")
|
206
|
-
# "[\\u0300-\\u036F]"
|
207
|
-
# >>> cclass("#xC0-#xD6")
|
208
|
-
# "[\\u00C0-\\u00D6]"
|
209
|
-
# >>> cclass("#x370-#x37D")
|
210
|
-
# "[\\u0370-\\u037D]"
|
211
|
-
#
|
212
|
-
# as in: ECHAR ::= '\' [tbnrf\"']
|
213
|
-
# >>> cclass("tbnrf\\\"'")
|
214
|
-
# 'tbnrf\\\\\\"\''
|
215
|
-
#
|
216
|
-
# >>> cclass("^#x22#x5C#x0A#x0D")
|
217
|
-
# '^\\u0022\\\\\\u005C\\u000A\\u000D'
|
218
|
-
def cclass(txt)
|
219
|
-
'[' +
|
220
|
-
txt.gsub(/\#x[0-9a-fA-F]+/) do |hx|
|
221
|
-
hx = hx[2..-1]
|
222
|
-
if hx.length <= 4
|
223
|
-
"\\u#{'0' * (4 - hx.length)}#{hx}"
|
224
|
-
elsif hx.length <= 8
|
225
|
-
"\\U#{'0' * (8 - hx.length)}#{hx}"
|
226
|
-
end
|
227
|
-
end +
|
228
|
-
']'
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
# Abstract syntax tree from parse
|
233
|
-
attr_reader :ast
|
234
|
-
|
235
|
-
# Parse the string or file input generating an abstract syntax tree
|
236
|
-
# in S-Expressions (similar to SPARQL SSE)
|
237
|
-
#
|
238
|
-
# @param [#read, #to_s] input
|
239
|
-
# @param [Hash{Symbol => Object}] options
|
240
|
-
# @option options [Boolean, Array] :debug
|
241
|
-
# Output debug information to an array or STDOUT.
|
242
|
-
def initialize(input, options = {})
|
243
|
-
@options = options
|
244
|
-
@lineno, @depth = 1, 0
|
245
|
-
token = false
|
246
|
-
@ast = []
|
247
|
-
|
248
|
-
input = input.respond_to?(:read) ? input.read : input.to_s
|
249
|
-
scanner = StringScanner.new(input)
|
250
|
-
|
251
|
-
eachRule(scanner) do |r|
|
252
|
-
debug("rule string") {r.inspect}
|
253
|
-
case r
|
254
|
-
when /^@terminals/
|
255
|
-
# Switch mode to parsing tokens
|
256
|
-
token = true
|
257
|
-
when /^@pass\s*(.*)$/m
|
258
|
-
rule = depth {ruleParts("[0] " + r)}
|
259
|
-
rule.kind = :pass
|
260
|
-
rule.orig = r
|
261
|
-
@ast << rule
|
262
|
-
else
|
263
|
-
rule = depth {ruleParts(r)}
|
264
|
-
|
265
|
-
# all caps symbols are tokens. Once a token is seen
|
266
|
-
# we don't go back
|
267
|
-
token ||= !!(rule.sym.to_s =~ /^[A-Z_]+$/)
|
268
|
-
rule.kind = token ? :token : :rule
|
269
|
-
rule.orig = r
|
270
|
-
@ast << rule
|
271
|
-
end
|
272
|
-
end
|
273
|
-
end
|
274
|
-
|
275
|
-
##
|
276
|
-
# Write out parsed syntax string as an S-Expression
|
277
|
-
# @return [String]
|
278
|
-
def to_sxp
|
279
|
-
begin
|
280
|
-
require 'sxp'
|
281
|
-
SXP::Generator.string(ast)
|
282
|
-
rescue LoadError
|
283
|
-
ast.to_sxp
|
284
|
-
end
|
285
|
-
end
|
286
|
-
|
287
|
-
##
|
288
|
-
# Write out syntax tree as Turtle
|
289
|
-
# @param [String] prefix for language
|
290
|
-
# @param [String] ns URI for language
|
291
|
-
# @return [String]
|
292
|
-
def to_ttl(prefix, ns)
|
293
|
-
token = false
|
294
|
-
|
295
|
-
unless ast.empty?
|
296
|
-
[
|
297
|
-
"@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.",
|
298
|
-
"@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.",
|
299
|
-
"@prefix #{prefix}: <#{ns}>.",
|
300
|
-
"@prefix : <#{ns}>.",
|
301
|
-
"@prefix re: <http://www.w3.org/2000/10/swap/grammar/regex#>.",
|
302
|
-
"@prefix g: <http://www.w3.org/2000/10/swap/grammar/ebnf#>.",
|
303
|
-
"",
|
304
|
-
":language rdfs:isDefinedBy <>; g:start :#{ast.first.id}.",
|
305
|
-
"",
|
306
|
-
]
|
307
|
-
end.join("\n") +
|
308
|
-
|
309
|
-
ast.
|
310
|
-
select {|a| [:rule, :token].include?(a.kind)}.
|
311
|
-
map(&:to_ttl).
|
312
|
-
join("\n")
|
313
|
-
end
|
314
|
-
|
315
|
-
##
|
316
|
-
# Iterate over rule strings.
|
317
|
-
# a line that starts with '\[' or '@' starts a new rule
|
318
|
-
#
|
319
|
-
# @param [StringScanner] scanner
|
320
|
-
# @yield rule_string
|
321
|
-
# @yieldparam [String] rule_string
|
322
|
-
def eachRule(scanner)
|
323
|
-
cur_lineno = 1
|
324
|
-
r = ''
|
325
|
-
until scanner.eos?
|
326
|
-
case
|
327
|
-
when s = scanner.scan(%r(\s+)m)
|
328
|
-
# Eat whitespace
|
329
|
-
cur_lineno += s.count("\n")
|
330
|
-
#debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
|
331
|
-
when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
|
332
|
-
# Eat comments
|
333
|
-
cur_lineno += s.count("\n")
|
334
|
-
debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
|
335
|
-
when s = scanner.scan(%r(^@terminals))
|
336
|
-
#debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
|
337
|
-
yield(r) unless r.empty?
|
338
|
-
@lineno = cur_lineno
|
339
|
-
yield(s)
|
340
|
-
r = ''
|
341
|
-
when s = scanner.scan(/@pass/)
|
342
|
-
# Found rule start, if we've already collected a rule, yield it
|
343
|
-
#debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
|
344
|
-
yield r unless r.empty?
|
345
|
-
@lineno = cur_lineno
|
346
|
-
r = s
|
347
|
-
when s = scanner.scan(/\[(?=\w+\])/)
|
348
|
-
# Found rule start, if we've already collected a rule, yield it
|
349
|
-
yield r unless r.empty?
|
350
|
-
#debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
|
351
|
-
@lineno = cur_lineno
|
352
|
-
r = s
|
353
|
-
else
|
354
|
-
# Collect until end of line, or start of comment
|
355
|
-
s = scanner.scan_until(%r((?:/\*)|$)m)
|
356
|
-
cur_lineno += s.count("\n")
|
357
|
-
#debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
|
358
|
-
r += s
|
359
|
-
end
|
360
|
-
end
|
361
|
-
yield r unless r.empty?
|
362
|
-
end
|
363
|
-
|
364
|
-
##
|
365
|
-
# Parse a rule into a rule number, a symbol and an expression
|
366
|
-
#
|
367
|
-
# @param [String] rule
|
368
|
-
# @return [Rule]
|
369
|
-
def ruleParts(rule)
|
370
|
-
num_sym, expr = rule.split('::=', 2).map(&:strip)
|
371
|
-
num, sym = num_sym.split(']', 2).map(&:strip)
|
372
|
-
num = num[1..-1]
|
373
|
-
r = Rule.new(sym && sym.to_sym, num, ebnf(expr).first, self)
|
374
|
-
debug("ruleParts") { r.inspect }
|
375
|
-
r
|
376
|
-
end
|
377
|
-
|
378
|
-
##
|
379
|
-
# Parse a string into an expression tree and a remaining string
|
380
|
-
#
|
381
|
-
# @example
|
382
|
-
# >>> ebnf("a b c")
|
383
|
-
# ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
|
384
|
-
#
|
385
|
-
# >>> ebnf("a? b+ c*")
|
386
|
-
# ((seq, \[(opt, ('id', 'a')), (plus, ('id', 'b')), ('*', ('id', 'c'))\]), '')
|
387
|
-
#
|
388
|
-
# >>> ebnf(" | x xlist")
|
389
|
-
# ((alt, \[(seq, \[\]), (seq, \[('id', 'x'), ('id', 'xlist')\])\]), '')
|
390
|
-
#
|
391
|
-
# >>> ebnf("a | (b - c)")
|
392
|
-
# ((alt, \[('id', 'a'), (diff, \[('id', 'b'), ('id', 'c')\])\]), '')
|
393
|
-
#
|
394
|
-
# >>> ebnf("a b | c d")
|
395
|
-
# ((alt, \[(seq, \[('id', 'a'), ('id', 'b')\]), (seq, \[('id', 'c'), ('id', 'd')\])\]), '')
|
396
|
-
#
|
397
|
-
# >>> ebnf("a | b | c")
|
398
|
-
# ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
|
399
|
-
#
|
400
|
-
# >>> ebnf("a) b c")
|
401
|
-
# (('id', 'a'), ' b c')
|
402
|
-
#
|
403
|
-
# >>> ebnf("BaseDecl? PrefixDecl*")
|
404
|
-
# ((seq, \[(opt, ('id', 'BaseDecl')), ('*', ('id', 'PrefixDecl'))\]), '')
|
405
|
-
#
|
406
|
-
# >>> ebnf("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
|
407
|
-
# ((alt, \[('id', 'NCCHAR1'), ("'", diff), (range, '0-9'), (hex, '#x00B7'), (range, '#x0300-#x036F'), (range, '#x203F-#x2040')\]), '')
|
408
|
-
#
|
409
|
-
# @param [String] s
|
410
|
-
# @return [Array]
|
411
|
-
def ebnf(s)
|
412
|
-
debug("ebnf") {"(#{s.inspect})"}
|
413
|
-
e, s = depth {alt(s)}
|
414
|
-
debug {"=> alt returned #{[e, s].inspect}"}
|
415
|
-
unless s.empty?
|
416
|
-
t, ss = depth {token(s)}
|
417
|
-
debug {"=> token returned #{[t, ss].inspect}"}
|
418
|
-
return [e, ss] if t.is_a?(Array) && t.first == :")"
|
419
|
-
end
|
420
|
-
[e, s]
|
421
|
-
end
|
422
|
-
|
423
|
-
##
|
424
|
-
# Parse alt
|
425
|
-
# >>> alt("a | b | c")
|
426
|
-
# ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
|
427
|
-
# @param [String] s
|
428
|
-
# @return [Array]
|
429
|
-
def alt(s)
|
430
|
-
debug("alt") {"(#{s.inspect})"}
|
431
|
-
args = []
|
432
|
-
while !s.empty?
|
433
|
-
e, s = depth {seq(s)}
|
434
|
-
debug {"=> seq returned #{[e, s].inspect}"}
|
435
|
-
if e.to_s.empty?
|
436
|
-
break unless args.empty?
|
437
|
-
e = [:seq, []] # empty sequence
|
438
|
-
end
|
439
|
-
args << e
|
440
|
-
unless s.empty?
|
441
|
-
t, ss = depth {token(s)}
|
442
|
-
break unless t[0] == :alt
|
443
|
-
s = ss
|
444
|
-
end
|
445
|
-
end
|
446
|
-
args.length > 1 ? [args.unshift(:alt), s] : [e, s]
|
447
|
-
end
|
448
|
-
|
449
|
-
##
|
450
|
-
# parse seq
|
451
|
-
#
|
452
|
-
# >>> seq("a b c")
|
453
|
-
# ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
|
454
|
-
#
|
455
|
-
# >>> seq("a b? c")
|
456
|
-
# ((seq, \[('id', 'a'), (opt, ('id', 'b')), ('id', 'c')\]), '')
|
457
|
-
def seq(s)
|
458
|
-
debug("seq") {"(#{s.inspect})"}
|
459
|
-
args = []
|
460
|
-
while !s.empty?
|
461
|
-
e, ss = depth {diff(s)}
|
462
|
-
debug {"=> diff returned #{[e, ss].inspect}"}
|
463
|
-
unless e.to_s.empty?
|
464
|
-
args << e
|
465
|
-
s = ss
|
466
|
-
else
|
467
|
-
break;
|
468
|
-
end
|
469
|
-
end
|
470
|
-
if args.length > 1
|
471
|
-
[args.unshift(:seq), s]
|
472
|
-
elsif args.length == 1
|
473
|
-
args + [s]
|
474
|
-
else
|
475
|
-
["", s]
|
476
|
-
end
|
477
|
-
end
|
478
|
-
|
479
|
-
##
|
480
|
-
# parse diff
|
481
|
-
#
|
482
|
-
# >>> diff("a - b")
|
483
|
-
# ((diff, \[('id', 'a'), ('id', 'b')\]), '')
|
484
|
-
def diff(s)
|
485
|
-
debug("diff") {"(#{s.inspect})"}
|
486
|
-
e1, s = depth {postfix(s)}
|
487
|
-
debug {"=> postfix returned #{[e1, s].inspect}"}
|
488
|
-
unless e1.to_s.empty?
|
489
|
-
unless s.empty?
|
490
|
-
t, ss = depth {token(s)}
|
491
|
-
debug {"diff #{[t, ss].inspect}"}
|
492
|
-
if t.is_a?(Array) && t.first == :diff
|
493
|
-
s = ss
|
494
|
-
e2, s = primary(s)
|
495
|
-
unless e2.to_s.empty?
|
496
|
-
return [[:diff, e1, e2], s]
|
497
|
-
else
|
498
|
-
raise "Syntax Error"
|
499
|
-
end
|
500
|
-
end
|
501
|
-
end
|
502
|
-
end
|
503
|
-
[e1, s]
|
504
|
-
end
|
505
|
-
|
506
|
-
##
|
507
|
-
# parse postfix
|
508
|
-
#
|
509
|
-
# >>> postfix("a b c")
|
510
|
-
# (('id', 'a'), ' b c')
|
511
|
-
#
|
512
|
-
# >>> postfix("a? b c")
|
513
|
-
# ((opt, ('id', 'a')), ' b c')
|
514
|
-
def postfix(s)
|
515
|
-
debug("postfix") {"(#{s.inspect})"}
|
516
|
-
e, s = depth {primary(s)}
|
517
|
-
debug {"=> primary returned #{[e, s].inspect}"}
|
518
|
-
return ["", s] if e.to_s.empty?
|
519
|
-
if !s.empty?
|
520
|
-
t, ss = depth {token(s)}
|
521
|
-
debug {"=> #{[t, ss].inspect}"}
|
522
|
-
if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
|
523
|
-
return [[t.first, e], ss]
|
524
|
-
end
|
525
|
-
end
|
526
|
-
[e, s]
|
527
|
-
end
|
528
|
-
|
529
|
-
##
|
530
|
-
# parse primary
|
531
|
-
#
|
532
|
-
# >>> primary("a b c")
|
533
|
-
# (('id', 'a'), ' b c')
|
534
|
-
def primary(s)
|
535
|
-
debug("primary") {"(#{s.inspect})"}
|
536
|
-
t, s = depth {token(s)}
|
537
|
-
debug {"=> token returned #{[t, s].inspect}"}
|
538
|
-
if t.is_a?(Symbol) || t.is_a?(String)
|
539
|
-
[t, s]
|
540
|
-
elsif %w(range hex).map(&:to_sym).include?(t.first)
|
541
|
-
[t, s]
|
542
|
-
elsif t.first == :"("
|
543
|
-
e, s = depth {ebnf(s)}
|
544
|
-
debug {"=> ebnf returned #{[e, s].inspect}"}
|
545
|
-
[e, s]
|
546
|
-
else
|
547
|
-
["", s]
|
548
|
-
end
|
549
|
-
end
|
550
|
-
|
551
|
-
##
|
552
|
-
# parse one token; return the token and the remaining string
|
553
|
-
#
|
554
|
-
# A token is represented as a tuple whose 1st item gives the type;
|
555
|
-
# some types have additional info in the tuple.
|
556
|
-
#
|
557
|
-
# @example
|
558
|
-
# >>> token("'abc' def")
|
559
|
-
# (("'", 'abc'), ' def')
|
560
|
-
#
|
561
|
-
# >>> token("[0-9]")
|
562
|
-
# ((range, '0-9'), '')
|
563
|
-
# >>> token("#x00B7")
|
564
|
-
# ((hex, '#x00B7'), '')
|
565
|
-
# >>> token ("\[#x0300-#x036F\]")
|
566
|
-
# ((range, '#x0300-#x036F'), '')
|
567
|
-
# >>> token("\[^<>'{}|^`\]-\[#x00-#x20\]")
|
568
|
-
# ((range, "^<>'{}|^`"), '-\[#x00-#x20\]')
|
569
|
-
def token(s)
|
570
|
-
s = s.strip
|
571
|
-
case m = s[0,1]
|
572
|
-
when '"', "'"
|
573
|
-
l, s = s[1..-1].split(m, 2)
|
574
|
-
[l, s]
|
575
|
-
when '['
|
576
|
-
l, s = s[1..-1].split(']', 2)
|
577
|
-
[[:range, l], s]
|
578
|
-
when '#'
|
579
|
-
s.match(/(#\w+)(.*)$/)
|
580
|
-
l, s = $1, $2
|
581
|
-
[[:hex, l], s]
|
582
|
-
when /[[:alpha:]]/
|
583
|
-
s.match(/(\w+)(.*)$/)
|
584
|
-
l, s = $1, $2
|
585
|
-
[l.to_sym, s]
|
586
|
-
when '@'
|
587
|
-
s.match(/@(#\w+)(.*)$/)
|
588
|
-
l, s = $1, $2
|
589
|
-
[[:"@", l], s]
|
590
|
-
when '-'
|
591
|
-
[[:diff], s[1..-1]]
|
592
|
-
when '?'
|
593
|
-
[[:opt], s[1..-1]]
|
594
|
-
when '|'
|
595
|
-
[[:alt], s[1..-1]]
|
596
|
-
when '+'
|
597
|
-
[[:plus], s[1..-1]]
|
598
|
-
when '*'
|
599
|
-
[[:star], s[1..-1]]
|
600
|
-
when /[\(\)]/
|
601
|
-
[[m.to_sym], s[1..-1]]
|
602
|
-
else
|
603
|
-
raise "unrecognized token: #{s.inspect}"
|
604
|
-
end
|
605
|
-
end
|
606
|
-
|
607
|
-
def depth
|
608
|
-
@depth += 1
|
609
|
-
ret = yield
|
610
|
-
@depth -= 1
|
611
|
-
ret
|
612
|
-
end
|
613
|
-
|
614
|
-
##
|
615
|
-
# Progress output when debugging
|
616
|
-
#
|
617
|
-
# @overload debug(node, message)
|
618
|
-
# @param [String] node relative location in input
|
619
|
-
# @param [String] message ("")
|
620
|
-
#
|
621
|
-
# @overload debug(message)
|
622
|
-
# @param [String] message ("")
|
623
|
-
#
|
624
|
-
# @yieldreturn [String] added to message
|
625
|
-
def debug(*args)
|
626
|
-
return unless @options[:debug]
|
627
|
-
options = args.last.is_a?(Hash) ? args.pop : {}
|
628
|
-
depth = options[:depth] || @depth
|
629
|
-
message = args.pop
|
630
|
-
message = message.call if message.is_a?(Proc)
|
631
|
-
args << message if message
|
632
|
-
args << yield if block_given?
|
633
|
-
message = "#{args.join(': ')}"
|
634
|
-
str = "[#{@lineno}]#{' ' * depth}#{message}"
|
635
|
-
@options[:debug] << str if @options[:debug].is_a?(Array)
|
636
|
-
$stderr.puts(str) if @options[:debug] == true
|
637
|
-
end
|
638
|
-
end
|