sparql 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,186 @@
1
+ require 'rdf/ll1/lexer'
2
+
3
+ module SPARQL::Grammar
4
+ module Terminals
5
+ # Definitions of token regular expressions used for lexical analysis
6
+
7
+ if RUBY_VERSION >= '1.9'
8
+ ##
9
+ # Unicode regular expressions for Ruby 1.9+ with the Oniguruma engine.
10
+ U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
11
+ [\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]|
12
+ [\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]|
13
+ [\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]|
14
+ [\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
15
+ EOS
16
+ U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]")
17
+ IRI_RANGE = Regexp.compile("[[^<>\"{}|^`\\\\]&&[^\\x00-\\x20]]")
18
+ else
19
+ ##
20
+ # UTF-8 regular expressions for Ruby 1.8.x.
21
+ U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
22
+ \\xC3[\\x80-\\x96]| (?# [\\u00C0-\\u00D6]|)
23
+ \\xC3[\\x98-\\xB6]| (?# [\\u00D8-\\u00F6]|)
24
+ \\xC3[\\xB8-\\xBF]|[\\xC4-\\xCB][\\x80-\\xBF]| (?# [\\u00F8-\\u02FF]|)
25
+ \\xCD[\\xB0-\\xBD]| (?# [\\u0370-\\u037D]|)
26
+ \\xCD\\xBF|[\\xCE-\\xDF][\\x80-\\xBF]| (?# [\\u037F-\\u1FFF]|)
27
+ \\xE0[\\xA0-\\xBF][\\x80-\\xBF]| (?# ...)
28
+ \\xE1[\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
29
+ \\xE2\\x80[\\x8C-\\x8D]| (?# [\\u200C-\\u200D]|)
30
+ \\xE2\\x81[\\xB0-\\xBF]| (?# [\\u2070-\\u218F]|)
31
+ \\xE2[\\x82-\\x85][\\x80-\\xBF]| (?# ...)
32
+ \\xE2\\x86[\\x80-\\x8F]| (?# ...)
33
+ \\xE2[\\xB0-\\xBE][\\x80-\\xBF]| (?# [\\u2C00-\\u2FEF]|)
34
+ \\xE2\\xBF[\\x80-\\xAF]| (?# ...)
35
+ \\xE3\\x80[\\x81-\\xBF]| (?# [\\u3001-\\uD7FF]|)
36
+ \\xE3[\\x81-\\xBF][\\x80-\\xBF]| (?# ...)
37
+ [\\xE4-\\xEC][\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
38
+ \\xED[\\x80-\\x9F][\\x80-\\xBF]| (?# ...)
39
+ \\xEF[\\xA4-\\xB6][\\x80-\\xBF]| (?# [\\uF900-\\uFDCF]|)
40
+ \\xEF\\xB7[\\x80-\\x8F]| (?# ...)
41
+ \\xEF\\xB7[\\xB0-\\xBF]| (?# [\\uFDF0-\\uFFFD]|)
42
+ \\xEF[\\xB8-\\xBE][\\x80-\\xBF]| (?# ...)
43
+ \\xEF\\xBF[\\x80-\\xBD]| (?# ...)
44
+ \\xF0[\\x90-\\xBF][\\x80-\\xBF][\\x80-\\xBF]| (?# [\\u{10000}-\\u{EFFFF}])
45
+ [\\xF1-\\xF2][\\x80-\\xBF][\\x80-\\xBF][\\x80-\\xBF]|
46
+ \\xF3[\\x80-\\xAF][\\x80-\\xBF][\\x80-\\xBF] (?# ...)
47
+ EOS
48
+ U_CHARS2 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
49
+ \\xC2\\xB7| (?# \\u00B7|)
50
+ \\xCC[\\x80-\\xBF]|\\xCD[\\x80-\\xAF]| (?# [\\u0300-\\u036F]|)
51
+ \\xE2\\x80\\xBF|\\xE2\\x81\\x80 (?# [\\u203F-\\u2040])
52
+ EOS
53
+ IRI_RANGE = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
54
+ \\x21| (?# ")
55
+ [\\x23-\\x3b]|\\x3d| (?# < & >)
56
+ [\\x3f-\\x5b]|\\x5d|\\x5f| (?# \ ^ `)
57
+ [\\x61-\\x7a]| (?# { } |)
58
+ [\\x7e-\\xff]
59
+ EOS
60
+ end
61
+
62
+ # 26
63
+ UCHAR = EBNF::LL1::Lexer::UCHAR
64
+ # 170s
65
+ PERCENT = /%[0-9A-Fa-f]{2}/
66
+ # 172s
67
+ PN_LOCAL_ESC = /\\[_~\.\-\!$\&'\(\)\*\+,;=:\/\?\#@%]/
68
+ # 169s
69
+ PLX = /#{PERCENT}|#{PN_LOCAL_ESC}/
70
+ # 153
71
+ PN_CHARS_BASE = /[A-Z]|[a-z]|#{U_CHARS1}/
72
+ # 154
73
+ PN_CHARS_U = /_|#{PN_CHARS_BASE}/
74
+ # 155
75
+ VARNAME = /(?:[0-9]|#{PN_CHARS_U})
76
+ (?:[0-9]|#{PN_CHARS_U}|#{U_CHARS2})*/x
77
+ # 156
78
+ PN_CHARS = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/
79
+ PN_LOCAL_BODY = /(?:(?:\.|:|#{PN_CHARS}|#{PLX})*(?:#{PN_CHARS}|:|#{PLX}))?/
80
+ PN_CHARS_BODY = /(?:(?:\.|#{PN_CHARS})*#{PN_CHARS})?/
81
+ # 157
82
+ PN_PREFIX = /#{PN_CHARS_BASE}#{PN_CHARS_BODY}/
83
+ # 158
84
+ PN_LOCAL = /(?:[0-9]|:|#{PN_CHARS_U}|#{PLX})#{PN_LOCAL_BODY}/
85
+ # 144
86
+ EXPONENT = /[eE][+-]?[0-9]+/
87
+ # 149
88
+ ECHAR = /\\[tbnrf\\"']/
89
+ # 18
90
+ IRIREF = /<(?:#{IRI_RANGE}|#{UCHAR})*>/
91
+ # 129
92
+ PNAME_NS = /#{PN_PREFIX}?:/
93
+ # 130
94
+ PNAME_LN = /#{PNAME_NS}#{PN_LOCAL}/
95
+ # 131
96
+ BLANK_NODE_LABEL = /_:((?:[0-9]|#{PN_CHARS_U})(?:#{PN_CHARS}|\.)*)/
97
+ # 132
98
+ VAR1 = /\?#{VARNAME}/
99
+ # 133
100
+ VAR2 = /\$#{VARNAME}/
101
+ # 134
102
+ LANGTAG = /@[a-zA-Z]+(?:-[a-zA-Z0-9]+)*/
103
+ # 135
104
+ INTEGER = /[0-9]+/
105
+ # 136
106
+ DECIMAL = /(?:[0-9]*\.[0-9]+)/
107
+ # 137
108
+ DOUBLE = /(?:[0-9]+\.[0-9]*#{EXPONENT}|\.?[0-9]+#{EXPONENT})/
109
+ # 138
110
+ INTEGER_POSITIVE = /(\+)([0-9]+)/
111
+ # 139
112
+ DECIMAL_POSITIVE = /(\+)([0-9]*\.[0-9]+)/
113
+ # 140
114
+ DOUBLE_POSITIVE = /(\+)([0-9]+\.[0-9]*#{EXPONENT}|\.?[0-9]+#{EXPONENT})/
115
+ # 141
116
+ INTEGER_NEGATIVE = /(\-)([0-9]+)/
117
+ # 142
118
+ DECIMAL_NEGATIVE = /(\-)([0-9]*\.[0-9]+)/
119
+ # 143
120
+ DOUBLE_NEGATIVE = /(\-)([0-9]+\.[0-9]*#{EXPONENT}|\.?[0-9]+#{EXPONENT})/
121
+ # 145
122
+ STRING_LITERAL1 = /'([^\'\\\n\r]|#{ECHAR}|#{UCHAR})*'/
123
+ # 146
124
+ STRING_LITERAL2 = /"([^\"\\\n\r]|#{ECHAR}|#{UCHAR})*"/
125
+ # 147
126
+ STRING_LITERAL_LONG1 = /'''((?:'|'')?(?:[^'\\]|#{ECHAR}|#{UCHAR}))*'''/m
127
+ # 148
128
+ STRING_LITERAL_LONG2 = /"""((?:"|"")?(?:[^"\\]|#{ECHAR}|#{UCHAR}))*"""/m
129
+
130
+ # 151
131
+ WS = / |\t|\r|\n /
132
+ # 150
133
+ NIL = /\(#{WS}*\)/
134
+ # 152
135
+ ANON = /\[#{WS}*\]/
136
+
137
+ # String terminals, case insensitive
138
+ STR_EXPR = %r(ABS|ADD|ALL|ASC|ASK|AS|BASE|BINDINGS|BIND
139
+ |BNODE|BOUND|BY|CEIL|CLEAR|COALESCE|CONCAT
140
+ |CONSTRUCT|CONTAINS|COPY|COUNT|CREATE|DATATYPE|DAY
141
+ |DEFAULT|DELETE\sDATA|DELETE\sWHERE|DELETE
142
+ |DESCRIBE|DESC|DISTINCT|DROP|ENCODE_FOR_URI|EXISTS
143
+ |FILTER|FLOOR|FROM|GRAPH|GROUP_CONCAT|GROUP|HAVING
144
+ |HOURS|IF|INSERT\sDATA|INSERT|INTO|IN|IRI
145
+ |LANGMATCHES|LANGTAG|LANG|LCASE|LIMIT|LOAD
146
+ |MAX|MD5|MINUS|MINUTES|MIN|MONTH|MOVE
147
+ |NAMED|NOT|NOW|OFFSET|OPTIONAL
148
+ |ORDER|PREFIX|RAND|REDUCED|REGEX|ROUND|SAMPLE|SECONDS
149
+ |SELECT|SEPARATOR|SERVICE
150
+ |SHA1|SHA224|SHA256|SHA384|SHA512
151
+ |STRDT|STRENDS|STRLANG|STRLEN|STRSTARTS|SUBSTR|STR|SUM
152
+ |TIMEZONE|TO|TZ|UCASE|UNDEF|UNION|URI|USING
153
+ |WHERE|WITH|YEAR
154
+ |isBLANK|isIRI|isURI|isLITERAL|isNUMERIC|sameTerm
155
+ |true
156
+ |false
157
+ |&&|!=|!|<=|>=|\^\^|\|\||[\(\),.;\[\]\{\}\+\-=<>\?\^\|\*\/a]
158
+ )xi
159
+
160
+ # Map terminals to canonical form
161
+ STR_MAP = (%w{ABS ADD ALL ASC ASK AS BASE BINDINGS BIND
162
+ BNODE BOUND BY CEIL CLEAR COALESCE CONCAT
163
+ CONSTRUCT CONTAINS COPY COUNT CREATE DATATYPE DAY
164
+ DEFAULT DELETE
165
+ DESCRIBE DESC DISTINCT DROP ENCODE_FOR_URI EXISTS
166
+ FILTER FLOOR FROM GRAPH GROUP_CONCAT GROUP HAVING
167
+ HOURS IF INSERT INTO IN IRI
168
+ LANGMATCHES LANGTAG LANG LCASE LIMIT LOAD
169
+ MAX MD5 MINUS MINUTES MIN MONTH MOVE
170
+ NAMED NOT NOW OFFSET OPTIONAL
171
+ ORDER PREFIX RAND REDUCED REGEX ROUND SAMPLE SECONDS
172
+ SELECT SEPARATOR SERVICE
173
+ SHA1 SHA224 SHA256 SHA384 SHA512
174
+ STRDT STRENDS STRLANG STRLEN STRSTARTS SUBSTR STR SUM
175
+ TIMEZONE TO TZ UCASE UNDEF UNION URI USING
176
+ WHERE WITH YEAR
177
+ isBLANK isIRI isURI isLITERAL isNUMERIC sameTerm
178
+ true
179
+ false
180
+ } + [
181
+ "DELETE DATA",
182
+ "DELETE WHERE",
183
+ "INSERT DATA",
184
+ ]).inject({}) {|memo, t| memo[t.downcase] = t; memo}.freeze
185
+ end
186
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sparql
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-02-08 00:00:00.000000000 Z
14
+ date: 2013-03-05 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rdf
@@ -29,6 +29,22 @@ dependencies:
29
29
  - - ! '>='
30
30
  - !ruby/object:Gem::Version
31
31
  version: 1.0.1
32
+ - !ruby/object:Gem::Dependency
33
+ name: ebnf
34
+ requirement: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: 0.1.1
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.1.1
32
48
  - !ruby/object:Gem::Dependency
33
49
  name: builder
34
50
  requirement: !ruby/object:Gem::Requirement
@@ -295,7 +311,7 @@ extra_rdoc_files: []
295
311
  files:
296
312
  - AUTHORS
297
313
  - CREDITS
298
- - README.markdown
314
+ - README.md
299
315
  - UNLICENSE
300
316
  - VERSION
301
317
  - bin/sparql
@@ -358,9 +374,9 @@ files:
358
374
  - lib/sparql/algebra/version.rb
359
375
  - lib/sparql/algebra.rb
360
376
  - lib/sparql/extensions.rb
361
- - lib/sparql/grammar/lexer.rb
362
- - lib/sparql/grammar/parser/meta.rb
363
- - lib/sparql/grammar/parser.rb
377
+ - lib/sparql/grammar/meta.rb
378
+ - lib/sparql/grammar/parser11.rb
379
+ - lib/sparql/grammar/terminals11.rb
364
380
  - lib/sparql/grammar.rb
365
381
  - lib/sparql/results.rb
366
382
  - lib/sparql/version.rb
@@ -1,613 +0,0 @@
1
- require 'strscan' unless defined?(StringScanner)
2
- require 'bigdecimal' unless defined?(BigDecimal)
3
-
4
- module SPARQL; module Grammar
5
- ##
6
- # A lexical analyzer for the SPARQL 1.0 grammar.
7
- #
8
- # Note that productions \[80\]-\[85\] have been incorporated directly into
9
- # \[77\], \[78\], \[79\].
10
- #
11
- # @example Tokenizing a SPARQL query string
12
- # query = "SELECT * WHERE { ?s ?p ?o }"
13
- # lexer = SPARQL::Grammar::Lexer.tokenize(query)
14
- # lexer.each_token do |token|
15
- # puts token.inspect
16
- # end
17
- #
18
- # @example Handling error conditions
19
- # begin
20
- # SPARQL::Grammar::Lexer.tokenize(query)
21
- # rescue SPARQL::Grammar::Lexer::Error => error
22
- # warn error.inspect
23
- # end
24
- #
25
- # @see http://www.w3.org/TR/rdf-sparql-query/#grammar
26
- # @see http://en.wikipedia.org/wiki/Lexical_analysis
27
- class Lexer
28
- include Enumerable
29
-
30
- ESCAPE_CHARS = {
31
- '\t' => "\t", # \u0009 (tab)
32
- '\n' => "\n", # \u000A (line feed)
33
- '\r' => "\r", # \u000D (carriage return)
34
- '\b' => "\b", # \u0008 (backspace)
35
- '\f' => "\f", # \u000C (form feed)
36
- '\\"' => '"', # \u0022 (quotation mark, double quote mark)
37
- '\\\'' => '\'', # \u0027 (apostrophe-quote, single quote mark)
38
- '\\\\' => '\\' # \u005C (backslash)
39
- }
40
- ESCAPE_CHAR4 = /\\u([0-9A-Fa-f]{4,4})/ # \uXXXX
41
- ESCAPE_CHAR8 = /\\U([0-9A-Fa-f]{8,8})/ # \UXXXXXXXX
42
- ESCAPE_CHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/
43
-
44
- ##
45
- # Unicode regular expressions for Ruby 1.9+ with the Oniguruma engine.
46
- module Unicode
47
- if RUBY_VERSION >= '1.9'
48
- U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
49
- [\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]|
50
- [\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]|
51
- [\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]|
52
- [\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
53
- EOS
54
- U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]")
55
- end
56
- end
57
-
58
- ##
59
- # UTF-8 regular expressions for Ruby 1.8.x.
60
- module UTF_8
61
- if RUBY_VERSION < '1.9'
62
- U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
63
- \\xC3[\\x80-\\x96]| (?# [\\u00C0-\\u00D6]|)
64
- \\xC3[\\x98-\\xB6]| (?# [\\u00D8-\\u00F6]|)
65
- \\xC3[\\xB8-\\xBF]|[\\xC4-\\xCB][\\x80-\\xBF]| (?# [\\u00F8-\\u02FF]|)
66
- \\xCD[\\xB0-\\xBD]| (?# [\\u0370-\\u037D]|)
67
- \\xCD\\xBF|[\\xCE-\\xDF][\\x80-\\xBF]| (?# [\\u037F-\\u1FFF]|)
68
- \\xE0[\\xA0-\\xBF][\\x80-\\xBF]| (?# ...)
69
- \\xE1[\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
70
- \\xE2\\x80[\\x8C-\\x8D]| (?# [\\u200C-\\u200D]|)
71
- \\xE2\\x81[\\xB0-\\xBF]| (?# [\\u2070-\\u218F]|)
72
- \\xE2[\\x82-\\x85][\\x80-\\xBF]| (?# ...)
73
- \\xE2\\x86[\\x80-\\x8F]| (?# ...)
74
- \\xE2[\\xB0-\\xBE][\\x80-\\xBF]| (?# [\\u2C00-\\u2FEF]|)
75
- \\xE2\\xBF[\\x80-\\xAF]| (?# ...)
76
- \\xE3\\x80[\\x81-\\xBF]| (?# [\\u3001-\\uD7FF]|)
77
- \\xE3[\\x81-\\xBF][\\x80-\\xBF]| (?# ...)
78
- [\\xE4-\\xEC][\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
79
- \\xED[\\x80-\\x9F][\\x80-\\xBF]| (?# ...)
80
- \\xEF[\\xA4-\\xB6][\\x80-\\xBF]| (?# [\\uF900-\\uFDCF]|)
81
- \\xEF\\xB7[\\x80-\\x8F]| (?# ...)
82
- \\xEF\\xB7[\\xB0-\\xBF]| (?# [\\uFDF0-\\uFFFD]|)
83
- \\xEF[\\xB8-\\xBE][\\x80-\\xBF]| (?# ...)
84
- \\xEF\\xBF[\\x80-\\xBD]| (?# ...)
85
- \\xF0[\\x90-\\xBF][\\x80-\\xBF][\\x80-\\xBF]| (?# [\\u{10000}-\\u{EFFFF}])
86
- [\\xF1-\\xF2][\\x80-\\xBF][\\x80-\\xBF][\\x80-\\xBF]|
87
- \\xF3[\\x80-\\xAF][\\x80-\\xBF][\\x80-\\xBF] (?# ...)
88
- EOS
89
- U_CHARS2 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
90
- \\xC2\\xB7| (?# \\u00B7|)
91
- \\xCC[\\x80-\\xBF]|\\xCD[\\x80-\\xAF]| (?# [\\u0300-\\u036F]|)
92
- \\xE2\\x80\\xBF|\\xE2\\x81\\x80 (?# [\\u203F-\\u2040])
93
- EOS
94
- end
95
- end
96
-
97
- if RUBY_VERSION < '1.9'
98
- include UTF_8
99
- else
100
- include Unicode
101
- end
102
-
103
- KEYWORD = /#{KEYWORDS.join('|')}|#{FUNCTIONS.join('|')}/i
104
- DELIMITER = /\^\^|[{}()\[\],;\.]/
105
- OPERATOR = /a|\|\||&&|!=|<=|>=|[!=<>+\-*\/]/
106
- COMMENT = /#.*/
107
-
108
- PN_CHARS_BASE = /[A-Z]|[a-z]|#{U_CHARS1}/ # \[95\]
109
- PN_CHARS_U = /_|#{PN_CHARS_BASE}/ # \[96\]
110
- VARNAME = /(?:[0-9]|#{PN_CHARS_U})
111
- (?:[0-9]|#{PN_CHARS_U}|#{U_CHARS2})*/x # \[97\]
112
- PN_CHARS = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/ # \[98\]
113
- PN_CHARS_BODY = /(?:(?:\.|#{PN_CHARS})*#{PN_CHARS})?/
114
- PN_PREFIX = /#{PN_CHARS_BASE}#{PN_CHARS_BODY}/ # \[99\]
115
- PN_LOCAL = /(?:[0-9]|#{PN_CHARS_U})#{PN_CHARS_BODY}/ # \[100\]
116
-
117
- IRI_REF = /<([^<>"{}|^`\\\x00-\x20]*)>/ # \[70\]
118
- PNAME_NS = /(#{PN_PREFIX}?):/ # \[71\]
119
- PNAME_LN = /#{PNAME_NS}(#{PN_LOCAL})/ # \[72\]
120
- BLANK_NODE_LABEL = /_:(#{PN_LOCAL})/ # \[73\]
121
- VAR1 = /\?(#{VARNAME})/ # \[74\]
122
- VAR2 = /\$(#{VARNAME})/ # \[75\]
123
- LANGTAG = /@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)/ # \[76\]
124
- INTEGER = /[0-9]+/ # \[77\]
125
- DECIMAL = /(?:[0-9]+\.[0-9]*|\.[0-9]+)/ # \[78\]
126
- EXPONENT = /[eE][+-]?[0-9]+/ # \[86\]
127
- DOUBLE = /(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)#{EXPONENT}/ # \[79\]
128
- ECHAR = /\\[tbnrf\\"']/ # \[91\]
129
- STRING_LITERAL1 = /'((?:[^\x27\x5C\x0A\x0D]|#{ECHAR})*)'/ # \[87\]
130
- STRING_LITERAL2 = /"((?:[^\x22\x5C\x0A\x0D]|#{ECHAR})*)"/ # \[88\]
131
- STRING_LITERAL_LONG1 = /'''((?:(?:'|'')?(?:[^'\\]|#{ECHAR})+)*)'''/m # \[89\]
132
- STRING_LITERAL_LONG2 = /"""((?:(?:"|"")?(?:[^"\\]|#{ECHAR})+)*)"""/m # \[90\]
133
- WS = /\x20|\x09|\x0D|\x0A/ # \[93\]
134
- NIL = /\(#{WS}*\)/ # \[92\]
135
- ANON = /\[#{WS}*\]/ # \[94\]
136
-
137
- BooleanLiteral = /true|false/ # \[65\]
138
- String = /#{STRING_LITERAL_LONG1}|#{STRING_LITERAL_LONG2}|
139
- #{STRING_LITERAL1}|#{STRING_LITERAL2}/x # \[66\]
140
-
141
- # Make all defined regular expression constants immutable:
142
- constants.each { |name| const_get(name).freeze }
143
-
144
- ##
145
- # Returns a copy of the given `input` string with all `\uXXXX` and
146
- # `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
147
- # unescaped UTF-8 character counterparts.
148
- #
149
- # @param [String] input
150
- # @return [String]
151
- # @see http://www.w3.org/TR/rdf-sparql-query/#codepointEscape
152
- def self.unescape_codepoints(input)
153
- string = input.dup
154
- string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding) # Ruby 1.9+
155
-
156
- # Decode \uXXXX and \UXXXXXXXX code points:
157
- string.gsub!(ESCAPE_CHAR) do
158
- s = [($1 || $2).hex].pack('U*')
159
- s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
160
- end
161
-
162
- string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) # Ruby 1.9+
163
- string
164
- end
165
-
166
- ##
167
- # Returns a copy of the given `input` string with all string escape
168
- # sequences (e.g. `\n` and `\t`) replaced with their unescaped UTF-8
169
- # character counterparts.
170
- #
171
- # @param [String] input
172
- # @return [String]
173
- # @see http://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
174
- def self.unescape_string(input)
175
- input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] }
176
- end
177
-
178
- ##
179
- # Tokenizes the given `input` string or stream.
180
- #
181
- # @param [String, #to_s] input
182
- # @param [Hash{Symbol => Object}] options
183
- # @yield [lexer]
184
- # @yieldparam [Lexer] lexer
185
- # @return [Lexer]
186
- # @raise [Lexer::Error] on invalid input
187
- def self.tokenize(input, options = {}, &block)
188
- lexer = self.new(input, options)
189
- block_given? ? block.call(lexer) : lexer
190
- end
191
-
192
- ##
193
- # Initializes a new lexer instance.
194
- #
195
- # @param [String, #to_s] input
196
- # @param [Hash{Symbol => Object}] options
197
- def initialize(input = nil, options = {})
198
- @options = options.dup
199
- self.input = input if input
200
- end
201
-
202
- ##
203
- # Any additional options for the lexer.
204
- #
205
- # @return [Hash]
206
- attr_reader :options
207
-
208
- ##
209
- # The current input string being processed.
210
- #
211
- # @return [String]
212
- attr_accessor :input
213
-
214
- ##
215
- # The current line number (zero-based).
216
- #
217
- # @return [Integer]
218
- attr_reader :lineno
219
-
220
- ##
221
- # @param [String, #to_s] input
222
- # @return [void]
223
- def input=(input)
224
- @input = case input
225
- when ::String then input
226
- when IO, StringIO then input.read
227
- else input.to_s
228
- end
229
- @input = @input.dup
230
- @input.force_encoding(Encoding::UTF_8) if @input.respond_to?(:force_encoding) # Ruby 1.9+
231
- @input = self.class.unescape_codepoints(@input)
232
- @lineno = 0
233
- end
234
-
235
- ##
236
- # Returns `true` if the input string is lexically valid.
237
- #
238
- # To be considered valid, the input string must contain more than zero
239
- # tokens, and must not contain any invalid tokens.
240
- #
241
- # @return [Boolean]
242
- def valid?
243
- begin
244
- !count.zero?
245
- rescue Error
246
- false
247
- end
248
- end
249
-
250
- ##
251
- # Enumerates each token in the input string.
252
- #
253
- # @yield [token]
254
- # @yieldparam [Token] token
255
- # @return [Enumerator]
256
- def each_token(&block)
257
- if block_given?
258
- @lineno = 0
259
- @scanner = StringScanner.new(@input)
260
- until scanner.eos?
261
- case
262
- when skip_whitespace
263
- when skip_comment
264
- when token = match_token
265
- yield token
266
- else
267
- lexeme = (@scanner.rest.split(/#{WS}|#{COMMENT}/).first rescue nil) || @scanner.rest
268
- raise Error.new("invalid token #{lexeme.inspect} on line #{lineno + 1}",
269
- :input => input, :token => lexeme, :lineno => lineno)
270
- end
271
- end
272
- @scanner = nil
273
- end
274
- enum_for(:each_token)
275
- end
276
- alias_method :each, :each_token
277
-
278
- protected
279
-
280
- # @return [StringScanner]
281
- attr_reader :scanner
282
-
283
- # @see http://www.w3.org/TR/rdf-sparql-query/#whitespace
284
- def skip_whitespace
285
- # skip all white space, but keep track of the current line number
286
- if matched = scanner.scan(WS)
287
- @lineno += matched.count("\n")
288
- matched
289
- end
290
- end
291
-
292
- # @see http://www.w3.org/TR/rdf-sparql-query/#grammarComments
293
- def skip_comment
294
- # skip the remainder of the current line
295
- skipped = scanner.skip(COMMENT)
296
- end
297
-
298
- def match_token
299
- match_var1 ||
300
- match_var2 ||
301
- match_iri_ref ||
302
- match_pname_ln ||
303
- match_pname_ns ||
304
- match_string_long_1 ||
305
- match_string_long_2 ||
306
- match_string_1 ||
307
- match_string_2 ||
308
- match_langtag ||
309
- match_double ||
310
- match_decimal ||
311
- match_integer ||
312
- match_boolean_literal ||
313
- match_blank_node_label||
314
- match_nil ||
315
- match_anon ||
316
- match_keyword ||
317
- match_delimiter ||
318
- match_operator
319
- end
320
-
321
- def match_var1
322
- if matched = scanner.scan(VAR1)
323
- token(:VAR1, scanner[1].to_s)
324
- end
325
- end
326
-
327
- def match_var2
328
- if matched = scanner.scan(VAR2)
329
- token(:VAR2, scanner[1].to_s)
330
- end
331
- end
332
-
333
- def match_iri_ref
334
- if matched = scanner.scan(IRI_REF)
335
- token(:IRI_REF, scanner[1].to_s)
336
- end
337
- end
338
-
339
- def match_pname_ln
340
- if matched = scanner.scan(PNAME_LN)
341
- token(:PNAME_LN, [scanner[1].empty? ? nil : scanner[1].to_s, scanner[2].to_s])
342
- end
343
- end
344
-
345
- def match_pname_ns
346
- if matched = scanner.scan(PNAME_NS)
347
- token(:PNAME_NS, scanner[1].empty? ? nil : scanner[1].to_s)
348
- end
349
- end
350
-
351
- def match_string_long_1
352
- if matched = scanner.scan(STRING_LITERAL_LONG1)
353
- token(:STRING_LITERAL_LONG1, self.class.unescape_string(scanner[1]))
354
- end
355
- end
356
-
357
- def match_string_long_2
358
- if matched = scanner.scan(STRING_LITERAL_LONG2)
359
- token(:STRING_LITERAL_LONG2, self.class.unescape_string(scanner[1]))
360
- end
361
- end
362
-
363
- def match_string_1
364
- if matched = scanner.scan(STRING_LITERAL1)
365
- token(:STRING_LITERAL1, self.class.unescape_string(scanner[1]))
366
- end
367
- end
368
-
369
- def match_string_2
370
- if matched = scanner.scan(STRING_LITERAL2)
371
- token(:STRING_LITERAL2, self.class.unescape_string(scanner[1]))
372
- end
373
- end
374
-
375
- def match_langtag
376
- if matched = scanner.scan(LANGTAG)
377
- token(:LANGTAG, scanner[1].to_s)
378
- end
379
- end
380
-
381
- def match_double
382
- if matched = scanner.scan(DOUBLE)
383
- token(:DOUBLE, matched)
384
- end
385
- end
386
-
387
- def match_decimal
388
- if matched = scanner.scan(DECIMAL)
389
- token(:DECIMAL, matched)
390
- end
391
- end
392
-
393
- def match_integer
394
- if matched = scanner.scan(INTEGER)
395
- token(:INTEGER, matched)
396
- end
397
- end
398
-
399
- def match_boolean_literal
400
- if matched = scanner.scan(BooleanLiteral)
401
- token(:BooleanLiteral, matched)
402
- end
403
- end
404
-
405
- def match_blank_node_label
406
- if matched = scanner.scan(BLANK_NODE_LABEL)
407
- token(:BLANK_NODE_LABEL, scanner[1].to_s)
408
- end
409
- end
410
-
411
- def match_nil
412
- if matched = scanner.scan(NIL)
413
- token(:NIL)
414
- end
415
- end
416
-
417
- def match_anon
418
- if matched = scanner.scan(ANON)
419
- token(:ANON)
420
- end
421
- end
422
-
423
- def match_keyword
424
- if matched = scanner.scan(KEYWORD)
425
- token(nil, matched.upcase.to_s)
426
- end
427
- end
428
-
429
- def match_delimiter
430
- if matched = scanner.scan(DELIMITER)
431
- token(nil, matched.to_s)
432
- end
433
- end
434
-
435
- def match_operator
436
- if matched = scanner.scan(OPERATOR)
437
- token(nil, matched.to_s)
438
- end
439
- end
440
-
441
- protected
442
-
443
- ##
444
- # Constructs a new token object annotated with the current line number.
445
- #
446
- # The parser relies on the type being a symbolized URI and the value being
447
- # a string, if there is no type. If there is a type, then the value takes
448
- # on the native representation appropriate for that type.
449
- #
450
- # @param [Symbol] type
451
- # @param [Object] value
452
- # @return [Token]
453
- def token(type, value = nil)
454
- Token.new(type, value, :lineno => lineno)
455
- end
456
-
457
- ##
458
- # Represents a lexer token.
459
- #
460
- # @example Creating a new token
461
- # token = SPARQL::Grammar::Lexer::Token.new(:LANGTAG, :en)
462
- # token.type #=> :LANGTAG
463
- # token.value #=> "en"
464
- #
465
- # @see http://en.wikipedia.org/wiki/Lexical_analysis#Token
466
- class Token
467
- ##
468
- # Initializes a new token instance.
469
- #
470
- # @param [Symbol] type
471
- # @param [Object] value
472
- # @param [Hash{Symbol => Object}] options
473
- # @option options [Integer] :lineno (nil)
474
- def initialize(type, value = nil, options = {})
475
- @type, @value = (type ? type.to_s.to_sym : nil), value
476
- @options = options.dup
477
- @lineno = @options.delete(:lineno)
478
- end
479
-
480
- ##
481
- # The token's symbol type.
482
- #
483
- # @return [Symbol]
484
- attr_reader :type
485
-
486
- ##
487
- # The token's value.
488
- #
489
- # @return [Object]
490
- attr_reader :value
491
-
492
- ##
493
- # The line number where the token was encountered.
494
- #
495
- # @return [Integer]
496
- attr_reader :lineno
497
-
498
- ##
499
- # Any additional options for the token.
500
- #
501
- # @return [Hash]
502
- attr_reader :options
503
-
504
- ##
505
- # Returns the attribute named by `key`.
506
- #
507
- # @param [Symbol] key
508
- # @return [Object]
509
- def [](key)
510
- key = key.to_s.to_sym unless key.is_a?(Integer) || key.is_a?(Symbol)
511
- case key
512
- when 0, :type then @type
513
- when 1, :value then @value
514
- else nil
515
- end
516
- end
517
-
518
- ##
519
- # Returns `true` if the given `value` matches either the type or value
520
- # of this token.
521
- #
522
- # @example Matching using the symbolic type
523
- # SPARQL::Grammar::Lexer::Token.new(:NIL) === :NIL #=> true
524
- #
525
- # @example Matching using the string value
526
- # SPARQL::Grammar::Lexer::Token.new(nil, "{") === "{" #=> true
527
- #
528
- # @param [Symbol, String] value
529
- # @return [Boolean]
530
- def ===(value)
531
- case value
532
- when Symbol then value == @type
533
- when ::String then value.to_s == @value.to_s
534
- else value == @value
535
- end
536
- end
537
-
538
- ##
539
- # Returns a hash table representation of this token.
540
- #
541
- # @return [Hash]
542
- def to_hash
543
- {:type => @type, :value => @value}
544
- end
545
-
546
- ##
547
- # Returns type, if not nil, otherwise value
548
- def representation
549
- @type ? @type : @value
550
- end
551
-
552
- ##
553
- # Returns an array representation of this token.
554
- #
555
- # @return [Array]
556
- def to_a
557
- [@type, @value]
558
- end
559
-
560
- ##
561
- # Returns a developer-friendly representation of this token.
562
- #
563
- # @return [String]
564
- def inspect
565
- to_hash.inspect
566
- end
567
- end # class Token
568
-
569
- ##
570
- # Raised for errors during lexical analysis.
571
- #
572
- # @example Raising a lexer error
573
- # raise SPARQL::Grammar::Lexer::Error.new(
574
- # "invalid token '%' on line 10",
575
- # :input => query, :token => '%', :lineno => 9)
576
- #
577
- # @see http://ruby-doc.org/core/classes/StandardError.html
578
- class Error < StandardError
579
- ##
580
- # The input string associated with the error.
581
- #
582
- # @return [String]
583
- attr_reader :input
584
-
585
- ##
586
- # The invalid token which triggered the error.
587
- #
588
- # @return [String]
589
- attr_reader :token
590
-
591
- ##
592
- # The line number where the error occurred.
593
- #
594
- # @return [Integer]
595
- attr_reader :lineno
596
-
597
- ##
598
- # Initializes a new lexer error instance.
599
- #
600
- # @param [String, #to_s] message
601
- # @param [Hash{Symbol => Object}] options
602
- # @option options [String] :input (nil)
603
- # @option options [String] :token (nil)
604
- # @option options [Integer] :lineno (nil)
605
- def initialize(message, options = {})
606
- @input = options[:input]
607
- @token = options[:token]
608
- @lineno = options[:lineno]
609
- super(message.to_s)
610
- end
611
- end # class Error
612
- end # class Lexer
613
- end; end # module SPARQL::Grammar