sparql 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,186 @@
1
+ require 'rdf/ll1/lexer'
2
+
3
+ module SPARQL::Grammar
4
+ module Terminals
5
+ # Definitions of token regular expressions used for lexical analysis
6
+
7
+ if RUBY_VERSION >= '1.9'
8
+ ##
9
+ # Unicode regular expressions for Ruby 1.9+ with the Oniguruma engine.
10
+ U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
11
+ [\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]|
12
+ [\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]|
13
+ [\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]|
14
+ [\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
15
+ EOS
16
+ U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]")
17
+ IRI_RANGE = Regexp.compile("[[^<>\"{}|^`\\\\]&&[^\\x00-\\x20]]")
18
+ else
19
+ ##
20
+ # UTF-8 regular expressions for Ruby 1.8.x.
21
+ U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
22
+ \\xC3[\\x80-\\x96]| (?# [\\u00C0-\\u00D6]|)
23
+ \\xC3[\\x98-\\xB6]| (?# [\\u00D8-\\u00F6]|)
24
+ \\xC3[\\xB8-\\xBF]|[\\xC4-\\xCB][\\x80-\\xBF]| (?# [\\u00F8-\\u02FF]|)
25
+ \\xCD[\\xB0-\\xBD]| (?# [\\u0370-\\u037D]|)
26
+ \\xCD\\xBF|[\\xCE-\\xDF][\\x80-\\xBF]| (?# [\\u037F-\\u1FFF]|)
27
+ \\xE0[\\xA0-\\xBF][\\x80-\\xBF]| (?# ...)
28
+ \\xE1[\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
29
+ \\xE2\\x80[\\x8C-\\x8D]| (?# [\\u200C-\\u200D]|)
30
+ \\xE2\\x81[\\xB0-\\xBF]| (?# [\\u2070-\\u218F]|)
31
+ \\xE2[\\x82-\\x85][\\x80-\\xBF]| (?# ...)
32
+ \\xE2\\x86[\\x80-\\x8F]| (?# ...)
33
+ \\xE2[\\xB0-\\xBE][\\x80-\\xBF]| (?# [\\u2C00-\\u2FEF]|)
34
+ \\xE2\\xBF[\\x80-\\xAF]| (?# ...)
35
+ \\xE3\\x80[\\x81-\\xBF]| (?# [\\u3001-\\uD7FF]|)
36
+ \\xE3[\\x81-\\xBF][\\x80-\\xBF]| (?# ...)
37
+ [\\xE4-\\xEC][\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
38
+ \\xED[\\x80-\\x9F][\\x80-\\xBF]| (?# ...)
39
+ \\xEF[\\xA4-\\xB6][\\x80-\\xBF]| (?# [\\uF900-\\uFDCF]|)
40
+ \\xEF\\xB7[\\x80-\\x8F]| (?# ...)
41
+ \\xEF\\xB7[\\xB0-\\xBF]| (?# [\\uFDF0-\\uFFFD]|)
42
+ \\xEF[\\xB8-\\xBE][\\x80-\\xBF]| (?# ...)
43
+ \\xEF\\xBF[\\x80-\\xBD]| (?# ...)
44
+ \\xF0[\\x90-\\xBF][\\x80-\\xBF][\\x80-\\xBF]| (?# [\\u{10000}-\\u{EFFFF}])
45
+ [\\xF1-\\xF2][\\x80-\\xBF][\\x80-\\xBF][\\x80-\\xBF]|
46
+ \\xF3[\\x80-\\xAF][\\x80-\\xBF][\\x80-\\xBF] (?# ...)
47
+ EOS
48
+ U_CHARS2 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
49
+ \\xC2\\xB7| (?# \\u00B7|)
50
+ \\xCC[\\x80-\\xBF]|\\xCD[\\x80-\\xAF]| (?# [\\u0300-\\u036F]|)
51
+ \\xE2\\x80\\xBF|\\xE2\\x81\\x80 (?# [\\u203F-\\u2040])
52
+ EOS
53
+ IRI_RANGE = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
54
+ \\x21| (?# ")
55
+ [\\x23-\\x3b]|\\x3d| (?# < & >)
56
+ [\\x3f-\\x5b]|\\x5d|\\x5f| (?# \ ^ `)
57
+ [\\x61-\\x7a]| (?# { } |)
58
+ [\\x7e-\\xff]
59
+ EOS
60
+ end
61
+
62
+ # 26
63
+ UCHAR = EBNF::LL1::Lexer::UCHAR
64
+ # 170s
65
+ PERCENT = /%[0-9A-Fa-f]{2}/
66
+ # 172s
67
+ PN_LOCAL_ESC = /\\[_~\.\-\!$\&'\(\)\*\+,;=:\/\?\#@%]/
68
+ # 169s
69
+ PLX = /#{PERCENT}|#{PN_LOCAL_ESC}/
70
+ # 153
71
+ PN_CHARS_BASE = /[A-Z]|[a-z]|#{U_CHARS1}/
72
+ # 154
73
+ PN_CHARS_U = /_|#{PN_CHARS_BASE}/
74
+ # 155
75
+ VARNAME = /(?:[0-9]|#{PN_CHARS_U})
76
+ (?:[0-9]|#{PN_CHARS_U}|#{U_CHARS2})*/x
77
+ # 156
78
+ PN_CHARS = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/
79
+ PN_LOCAL_BODY = /(?:(?:\.|:|#{PN_CHARS}|#{PLX})*(?:#{PN_CHARS}|:|#{PLX}))?/
80
+ PN_CHARS_BODY = /(?:(?:\.|#{PN_CHARS})*#{PN_CHARS})?/
81
+ # 157
82
+ PN_PREFIX = /#{PN_CHARS_BASE}#{PN_CHARS_BODY}/
83
+ # 158
84
+ PN_LOCAL = /(?:[0-9]|:|#{PN_CHARS_U}|#{PLX})#{PN_LOCAL_BODY}/
85
+ # 144
86
+ EXPONENT = /[eE][+-]?[0-9]+/
87
+ # 149
88
+ ECHAR = /\\[tbnrf\\"']/
89
+ # 18
90
+ IRIREF = /<(?:#{IRI_RANGE}|#{UCHAR})*>/
91
+ # 129
92
+ PNAME_NS = /#{PN_PREFIX}?:/
93
+ # 130
94
+ PNAME_LN = /#{PNAME_NS}#{PN_LOCAL}/
95
+ # 131
96
+ BLANK_NODE_LABEL = /_:((?:[0-9]|#{PN_CHARS_U})(?:#{PN_CHARS}|\.)*)/
97
+ # 132
98
+ VAR1 = /\?#{VARNAME}/
99
+ # 133
100
+ VAR2 = /\$#{VARNAME}/
101
+ # 134
102
+ LANGTAG = /@[a-zA-Z]+(?:-[a-zA-Z0-9]+)*/
103
+ # 135
104
+ INTEGER = /[0-9]+/
105
+ # 136
106
+ DECIMAL = /(?:[0-9]*\.[0-9]+)/
107
+ # 137
108
+ DOUBLE = /(?:[0-9]+\.[0-9]*#{EXPONENT}|\.?[0-9]+#{EXPONENT})/
109
+ # 138
110
+ INTEGER_POSITIVE = /(\+)([0-9]+)/
111
+ # 139
112
+ DECIMAL_POSITIVE = /(\+)([0-9]*\.[0-9]+)/
113
+ # 140
114
+ DOUBLE_POSITIVE = /(\+)([0-9]+\.[0-9]*#{EXPONENT}|\.?[0-9]+#{EXPONENT})/
115
+ # 141
116
+ INTEGER_NEGATIVE = /(\-)([0-9]+)/
117
+ # 142
118
+ DECIMAL_NEGATIVE = /(\-)([0-9]*\.[0-9]+)/
119
+ # 143
120
+ DOUBLE_NEGATIVE = /(\-)([0-9]+\.[0-9]*#{EXPONENT}|\.?[0-9]+#{EXPONENT})/
121
+ # 145
122
+ STRING_LITERAL1 = /'([^\'\\\n\r]|#{ECHAR}|#{UCHAR})*'/
123
+ # 146
124
+ STRING_LITERAL2 = /"([^\"\\\n\r]|#{ECHAR}|#{UCHAR})*"/
125
+ # 147
126
+ STRING_LITERAL_LONG1 = /'''((?:'|'')?(?:[^'\\]|#{ECHAR}|#{UCHAR}))*'''/m
127
+ # 148
128
+ STRING_LITERAL_LONG2 = /"""((?:"|"")?(?:[^"\\]|#{ECHAR}|#{UCHAR}))*"""/m
129
+
130
+ # 151
131
+ WS = / |\t|\r|\n /
132
+ # 150
133
+ NIL = /\(#{WS}*\)/
134
+ # 152
135
+ ANON = /\[#{WS}*\]/
136
+
137
+ # String terminals, case insensitive
138
+ STR_EXPR = %r(ABS|ADD|ALL|ASC|ASK|AS|BASE|BINDINGS|BIND
139
+ |BNODE|BOUND|BY|CEIL|CLEAR|COALESCE|CONCAT
140
+ |CONSTRUCT|CONTAINS|COPY|COUNT|CREATE|DATATYPE|DAY
141
+ |DEFAULT|DELETE\sDATA|DELETE\sWHERE|DELETE
142
+ |DESCRIBE|DESC|DISTINCT|DROP|ENCODE_FOR_URI|EXISTS
143
+ |FILTER|FLOOR|FROM|GRAPH|GROUP_CONCAT|GROUP|HAVING
144
+ |HOURS|IF|INSERT\sDATA|INSERT|INTO|IN|IRI
145
+ |LANGMATCHES|LANGTAG|LANG|LCASE|LIMIT|LOAD
146
+ |MAX|MD5|MINUS|MINUTES|MIN|MONTH|MOVE
147
+ |NAMED|NOT|NOW|OFFSET|OPTIONAL
148
+ |ORDER|PREFIX|RAND|REDUCED|REGEX|ROUND|SAMPLE|SECONDS
149
+ |SELECT|SEPARATOR|SERVICE
150
+ |SHA1|SHA224|SHA256|SHA384|SHA512
151
+ |STRDT|STRENDS|STRLANG|STRLEN|STRSTARTS|SUBSTR|STR|SUM
152
+ |TIMEZONE|TO|TZ|UCASE|UNDEF|UNION|URI|USING
153
+ |WHERE|WITH|YEAR
154
+ |isBLANK|isIRI|isURI|isLITERAL|isNUMERIC|sameTerm
155
+ |true
156
+ |false
157
+ |&&|!=|!|<=|>=|\^\^|\|\||[\(\),.;\[\]\{\}\+\-=<>\?\^\|\*\/a]
158
+ )xi
159
+
160
+ # Map terminals to canonical form
161
+ STR_MAP = (%w{ABS ADD ALL ASC ASK AS BASE BINDINGS BIND
162
+ BNODE BOUND BY CEIL CLEAR COALESCE CONCAT
163
+ CONSTRUCT CONTAINS COPY COUNT CREATE DATATYPE DAY
164
+ DEFAULT DELETE
165
+ DESCRIBE DESC DISTINCT DROP ENCODE_FOR_URI EXISTS
166
+ FILTER FLOOR FROM GRAPH GROUP_CONCAT GROUP HAVING
167
+ HOURS IF INSERT INTO IN IRI
168
+ LANGMATCHES LANGTAG LANG LCASE LIMIT LOAD
169
+ MAX MD5 MINUS MINUTES MIN MONTH MOVE
170
+ NAMED NOT NOW OFFSET OPTIONAL
171
+ ORDER PREFIX RAND REDUCED REGEX ROUND SAMPLE SECONDS
172
+ SELECT SEPARATOR SERVICE
173
+ SHA1 SHA224 SHA256 SHA384 SHA512
174
+ STRDT STRENDS STRLANG STRLEN STRSTARTS SUBSTR STR SUM
175
+ TIMEZONE TO TZ UCASE UNDEF UNION URI USING
176
+ WHERE WITH YEAR
177
+ isBLANK isIRI isURI isLITERAL isNUMERIC sameTerm
178
+ true
179
+ false
180
+ } + [
181
+ "DELETE DATA",
182
+ "DELETE WHERE",
183
+ "INSERT DATA",
184
+ ]).inject({}) {|memo, t| memo[t.downcase] = t; memo}.freeze
185
+ end
186
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sparql
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-02-08 00:00:00.000000000 Z
14
+ date: 2013-03-05 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rdf
@@ -29,6 +29,22 @@ dependencies:
29
29
  - - ! '>='
30
30
  - !ruby/object:Gem::Version
31
31
  version: 1.0.1
32
+ - !ruby/object:Gem::Dependency
33
+ name: ebnf
34
+ requirement: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: 0.1.1
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.1.1
32
48
  - !ruby/object:Gem::Dependency
33
49
  name: builder
34
50
  requirement: !ruby/object:Gem::Requirement
@@ -295,7 +311,7 @@ extra_rdoc_files: []
295
311
  files:
296
312
  - AUTHORS
297
313
  - CREDITS
298
- - README.markdown
314
+ - README.md
299
315
  - UNLICENSE
300
316
  - VERSION
301
317
  - bin/sparql
@@ -358,9 +374,9 @@ files:
358
374
  - lib/sparql/algebra/version.rb
359
375
  - lib/sparql/algebra.rb
360
376
  - lib/sparql/extensions.rb
361
- - lib/sparql/grammar/lexer.rb
362
- - lib/sparql/grammar/parser/meta.rb
363
- - lib/sparql/grammar/parser.rb
377
+ - lib/sparql/grammar/meta.rb
378
+ - lib/sparql/grammar/parser11.rb
379
+ - lib/sparql/grammar/terminals11.rb
364
380
  - lib/sparql/grammar.rb
365
381
  - lib/sparql/results.rb
366
382
  - lib/sparql/version.rb
@@ -1,613 +0,0 @@
1
- require 'strscan' unless defined?(StringScanner)
2
- require 'bigdecimal' unless defined?(BigDecimal)
3
-
4
- module SPARQL; module Grammar
5
- ##
6
- # A lexical analyzer for the SPARQL 1.0 grammar.
7
- #
8
- # Note that productions \[80\]-\[85\] have been incorporated directly into
9
- # \[77\], \[78\], \[79\].
10
- #
11
- # @example Tokenizing a SPARQL query string
12
- # query = "SELECT * WHERE { ?s ?p ?o }"
13
- # lexer = SPARQL::Grammar::Lexer.tokenize(query)
14
- # lexer.each_token do |token|
15
- # puts token.inspect
16
- # end
17
- #
18
- # @example Handling error conditions
19
- # begin
20
- # SPARQL::Grammar::Lexer.tokenize(query)
21
- # rescue SPARQL::Grammar::Lexer::Error => error
22
- # warn error.inspect
23
- # end
24
- #
25
- # @see http://www.w3.org/TR/rdf-sparql-query/#grammar
26
- # @see http://en.wikipedia.org/wiki/Lexical_analysis
27
- class Lexer
28
- include Enumerable
29
-
30
- ESCAPE_CHARS = {
31
- '\t' => "\t", # \u0009 (tab)
32
- '\n' => "\n", # \u000A (line feed)
33
- '\r' => "\r", # \u000D (carriage return)
34
- '\b' => "\b", # \u0008 (backspace)
35
- '\f' => "\f", # \u000C (form feed)
36
- '\\"' => '"', # \u0022 (quotation mark, double quote mark)
37
- '\\\'' => '\'', # \u0027 (apostrophe-quote, single quote mark)
38
- '\\\\' => '\\' # \u005C (backslash)
39
- }
40
- ESCAPE_CHAR4 = /\\u([0-9A-Fa-f]{4,4})/ # \uXXXX
41
- ESCAPE_CHAR8 = /\\U([0-9A-Fa-f]{8,8})/ # \UXXXXXXXX
42
- ESCAPE_CHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/
43
-
44
- ##
45
- # Unicode regular expressions for Ruby 1.9+ with the Oniguruma engine.
46
- module Unicode
47
- if RUBY_VERSION >= '1.9'
48
- U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
49
- [\\u00C0-\\u00D6]|[\\u00D8-\\u00F6]|[\\u00F8-\\u02FF]|
50
- [\\u0370-\\u037D]|[\\u037F-\\u1FFF]|[\\u200C-\\u200D]|
51
- [\\u2070-\\u218F]|[\\u2C00-\\u2FEF]|[\\u3001-\\uD7FF]|
52
- [\\uF900-\\uFDCF]|[\\uFDF0-\\uFFFD]|[\\u{10000}-\\u{EFFFF}]
53
- EOS
54
- U_CHARS2 = Regexp.compile("\\u00B7|[\\u0300-\\u036F]|[\\u203F-\\u2040]")
55
- end
56
- end
57
-
58
- ##
59
- # UTF-8 regular expressions for Ruby 1.8.x.
60
- module UTF_8
61
- if RUBY_VERSION < '1.9'
62
- U_CHARS1 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
63
- \\xC3[\\x80-\\x96]| (?# [\\u00C0-\\u00D6]|)
64
- \\xC3[\\x98-\\xB6]| (?# [\\u00D8-\\u00F6]|)
65
- \\xC3[\\xB8-\\xBF]|[\\xC4-\\xCB][\\x80-\\xBF]| (?# [\\u00F8-\\u02FF]|)
66
- \\xCD[\\xB0-\\xBD]| (?# [\\u0370-\\u037D]|)
67
- \\xCD\\xBF|[\\xCE-\\xDF][\\x80-\\xBF]| (?# [\\u037F-\\u1FFF]|)
68
- \\xE0[\\xA0-\\xBF][\\x80-\\xBF]| (?# ...)
69
- \\xE1[\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
70
- \\xE2\\x80[\\x8C-\\x8D]| (?# [\\u200C-\\u200D]|)
71
- \\xE2\\x81[\\xB0-\\xBF]| (?# [\\u2070-\\u218F]|)
72
- \\xE2[\\x82-\\x85][\\x80-\\xBF]| (?# ...)
73
- \\xE2\\x86[\\x80-\\x8F]| (?# ...)
74
- \\xE2[\\xB0-\\xBE][\\x80-\\xBF]| (?# [\\u2C00-\\u2FEF]|)
75
- \\xE2\\xBF[\\x80-\\xAF]| (?# ...)
76
- \\xE3\\x80[\\x81-\\xBF]| (?# [\\u3001-\\uD7FF]|)
77
- \\xE3[\\x81-\\xBF][\\x80-\\xBF]| (?# ...)
78
- [\\xE4-\\xEC][\\x80-\\xBF][\\x80-\\xBF]| (?# ...)
79
- \\xED[\\x80-\\x9F][\\x80-\\xBF]| (?# ...)
80
- \\xEF[\\xA4-\\xB6][\\x80-\\xBF]| (?# [\\uF900-\\uFDCF]|)
81
- \\xEF\\xB7[\\x80-\\x8F]| (?# ...)
82
- \\xEF\\xB7[\\xB0-\\xBF]| (?# [\\uFDF0-\\uFFFD]|)
83
- \\xEF[\\xB8-\\xBE][\\x80-\\xBF]| (?# ...)
84
- \\xEF\\xBF[\\x80-\\xBD]| (?# ...)
85
- \\xF0[\\x90-\\xBF][\\x80-\\xBF][\\x80-\\xBF]| (?# [\\u{10000}-\\u{EFFFF}])
86
- [\\xF1-\\xF2][\\x80-\\xBF][\\x80-\\xBF][\\x80-\\xBF]|
87
- \\xF3[\\x80-\\xAF][\\x80-\\xBF][\\x80-\\xBF] (?# ...)
88
- EOS
89
- U_CHARS2 = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
90
- \\xC2\\xB7| (?# \\u00B7|)
91
- \\xCC[\\x80-\\xBF]|\\xCD[\\x80-\\xAF]| (?# [\\u0300-\\u036F]|)
92
- \\xE2\\x80\\xBF|\\xE2\\x81\\x80 (?# [\\u203F-\\u2040])
93
- EOS
94
- end
95
- end
96
-
97
- if RUBY_VERSION < '1.9'
98
- include UTF_8
99
- else
100
- include Unicode
101
- end
102
-
103
- KEYWORD = /#{KEYWORDS.join('|')}|#{FUNCTIONS.join('|')}/i
104
- DELIMITER = /\^\^|[{}()\[\],;\.]/
105
- OPERATOR = /a|\|\||&&|!=|<=|>=|[!=<>+\-*\/]/
106
- COMMENT = /#.*/
107
-
108
- PN_CHARS_BASE = /[A-Z]|[a-z]|#{U_CHARS1}/ # \[95\]
109
- PN_CHARS_U = /_|#{PN_CHARS_BASE}/ # \[96\]
110
- VARNAME = /(?:[0-9]|#{PN_CHARS_U})
111
- (?:[0-9]|#{PN_CHARS_U}|#{U_CHARS2})*/x # \[97\]
112
- PN_CHARS = /-|[0-9]|#{PN_CHARS_U}|#{U_CHARS2}/ # \[98\]
113
- PN_CHARS_BODY = /(?:(?:\.|#{PN_CHARS})*#{PN_CHARS})?/
114
- PN_PREFIX = /#{PN_CHARS_BASE}#{PN_CHARS_BODY}/ # \[99\]
115
- PN_LOCAL = /(?:[0-9]|#{PN_CHARS_U})#{PN_CHARS_BODY}/ # \[100\]
116
-
117
- IRI_REF = /<([^<>"{}|^`\\\x00-\x20]*)>/ # \[70\]
118
- PNAME_NS = /(#{PN_PREFIX}?):/ # \[71\]
119
- PNAME_LN = /#{PNAME_NS}(#{PN_LOCAL})/ # \[72\]
120
- BLANK_NODE_LABEL = /_:(#{PN_LOCAL})/ # \[73\]
121
- VAR1 = /\?(#{VARNAME})/ # \[74\]
122
- VAR2 = /\$(#{VARNAME})/ # \[75\]
123
- LANGTAG = /@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)/ # \[76\]
124
- INTEGER = /[0-9]+/ # \[77\]
125
- DECIMAL = /(?:[0-9]+\.[0-9]*|\.[0-9]+)/ # \[78\]
126
- EXPONENT = /[eE][+-]?[0-9]+/ # \[86\]
127
- DOUBLE = /(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)#{EXPONENT}/ # \[79\]
128
- ECHAR = /\\[tbnrf\\"']/ # \[91\]
129
- STRING_LITERAL1 = /'((?:[^\x27\x5C\x0A\x0D]|#{ECHAR})*)'/ # \[87\]
130
- STRING_LITERAL2 = /"((?:[^\x22\x5C\x0A\x0D]|#{ECHAR})*)"/ # \[88\]
131
- STRING_LITERAL_LONG1 = /'''((?:(?:'|'')?(?:[^'\\]|#{ECHAR})+)*)'''/m # \[89\]
132
- STRING_LITERAL_LONG2 = /"""((?:(?:"|"")?(?:[^"\\]|#{ECHAR})+)*)"""/m # \[90\]
133
- WS = /\x20|\x09|\x0D|\x0A/ # \[93\]
134
- NIL = /\(#{WS}*\)/ # \[92\]
135
- ANON = /\[#{WS}*\]/ # \[94\]
136
-
137
- BooleanLiteral = /true|false/ # \[65\]
138
- String = /#{STRING_LITERAL_LONG1}|#{STRING_LITERAL_LONG2}|
139
- #{STRING_LITERAL1}|#{STRING_LITERAL2}/x # \[66\]
140
-
141
- # Make all defined regular expression constants immutable:
142
- constants.each { |name| const_get(name).freeze }
143
-
144
- ##
145
- # Returns a copy of the given `input` string with all `\uXXXX` and
146
- # `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
147
- # unescaped UTF-8 character counterparts.
148
- #
149
- # @param [String] input
150
- # @return [String]
151
- # @see http://www.w3.org/TR/rdf-sparql-query/#codepointEscape
152
- def self.unescape_codepoints(input)
153
- string = input.dup
154
- string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding) # Ruby 1.9+
155
-
156
- # Decode \uXXXX and \UXXXXXXXX code points:
157
- string.gsub!(ESCAPE_CHAR) do
158
- s = [($1 || $2).hex].pack('U*')
159
- s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
160
- end
161
-
162
- string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) # Ruby 1.9+
163
- string
164
- end
165
-
166
- ##
167
- # Returns a copy of the given `input` string with all string escape
168
- # sequences (e.g. `\n` and `\t`) replaced with their unescaped UTF-8
169
- # character counterparts.
170
- #
171
- # @param [String] input
172
- # @return [String]
173
- # @see http://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
174
- def self.unescape_string(input)
175
- input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] }
176
- end
177
-
178
- ##
179
- # Tokenizes the given `input` string or stream.
180
- #
181
- # @param [String, #to_s] input
182
- # @param [Hash{Symbol => Object}] options
183
- # @yield [lexer]
184
- # @yieldparam [Lexer] lexer
185
- # @return [Lexer]
186
- # @raise [Lexer::Error] on invalid input
187
- def self.tokenize(input, options = {}, &block)
188
- lexer = self.new(input, options)
189
- block_given? ? block.call(lexer) : lexer
190
- end
191
-
192
- ##
193
- # Initializes a new lexer instance.
194
- #
195
- # @param [String, #to_s] input
196
- # @param [Hash{Symbol => Object}] options
197
- def initialize(input = nil, options = {})
198
- @options = options.dup
199
- self.input = input if input
200
- end
201
-
202
- ##
203
- # Any additional options for the lexer.
204
- #
205
- # @return [Hash]
206
- attr_reader :options
207
-
208
- ##
209
- # The current input string being processed.
210
- #
211
- # @return [String]
212
- attr_accessor :input
213
-
214
- ##
215
- # The current line number (zero-based).
216
- #
217
- # @return [Integer]
218
- attr_reader :lineno
219
-
220
- ##
221
- # @param [String, #to_s] input
222
- # @return [void]
223
- def input=(input)
224
- @input = case input
225
- when ::String then input
226
- when IO, StringIO then input.read
227
- else input.to_s
228
- end
229
- @input = @input.dup
230
- @input.force_encoding(Encoding::UTF_8) if @input.respond_to?(:force_encoding) # Ruby 1.9+
231
- @input = self.class.unescape_codepoints(@input)
232
- @lineno = 0
233
- end
234
-
235
- ##
236
- # Returns `true` if the input string is lexically valid.
237
- #
238
- # To be considered valid, the input string must contain more than zero
239
- # tokens, and must not contain any invalid tokens.
240
- #
241
- # @return [Boolean]
242
- def valid?
243
- begin
244
- !count.zero?
245
- rescue Error
246
- false
247
- end
248
- end
249
-
250
- ##
251
- # Enumerates each token in the input string.
252
- #
253
- # @yield [token]
254
- # @yieldparam [Token] token
255
- # @return [Enumerator]
256
- def each_token(&block)
257
- if block_given?
258
- @lineno = 0
259
- @scanner = StringScanner.new(@input)
260
- until scanner.eos?
261
- case
262
- when skip_whitespace
263
- when skip_comment
264
- when token = match_token
265
- yield token
266
- else
267
- lexeme = (@scanner.rest.split(/#{WS}|#{COMMENT}/).first rescue nil) || @scanner.rest
268
- raise Error.new("invalid token #{lexeme.inspect} on line #{lineno + 1}",
269
- :input => input, :token => lexeme, :lineno => lineno)
270
- end
271
- end
272
- @scanner = nil
273
- end
274
- enum_for(:each_token)
275
- end
276
- alias_method :each, :each_token
277
-
278
- protected
279
-
280
- # @return [StringScanner]
281
- attr_reader :scanner
282
-
283
- # @see http://www.w3.org/TR/rdf-sparql-query/#whitespace
284
- def skip_whitespace
285
- # skip all white space, but keep track of the current line number
286
- if matched = scanner.scan(WS)
287
- @lineno += matched.count("\n")
288
- matched
289
- end
290
- end
291
-
292
- # @see http://www.w3.org/TR/rdf-sparql-query/#grammarComments
293
- def skip_comment
294
- # skip the remainder of the current line
295
- skipped = scanner.skip(COMMENT)
296
- end
297
-
298
- def match_token
299
- match_var1 ||
300
- match_var2 ||
301
- match_iri_ref ||
302
- match_pname_ln ||
303
- match_pname_ns ||
304
- match_string_long_1 ||
305
- match_string_long_2 ||
306
- match_string_1 ||
307
- match_string_2 ||
308
- match_langtag ||
309
- match_double ||
310
- match_decimal ||
311
- match_integer ||
312
- match_boolean_literal ||
313
- match_blank_node_label||
314
- match_nil ||
315
- match_anon ||
316
- match_keyword ||
317
- match_delimiter ||
318
- match_operator
319
- end
320
-
321
- def match_var1
322
- if matched = scanner.scan(VAR1)
323
- token(:VAR1, scanner[1].to_s)
324
- end
325
- end
326
-
327
- def match_var2
328
- if matched = scanner.scan(VAR2)
329
- token(:VAR2, scanner[1].to_s)
330
- end
331
- end
332
-
333
- def match_iri_ref
334
- if matched = scanner.scan(IRI_REF)
335
- token(:IRI_REF, scanner[1].to_s)
336
- end
337
- end
338
-
339
- def match_pname_ln
340
- if matched = scanner.scan(PNAME_LN)
341
- token(:PNAME_LN, [scanner[1].empty? ? nil : scanner[1].to_s, scanner[2].to_s])
342
- end
343
- end
344
-
345
- def match_pname_ns
346
- if matched = scanner.scan(PNAME_NS)
347
- token(:PNAME_NS, scanner[1].empty? ? nil : scanner[1].to_s)
348
- end
349
- end
350
-
351
- def match_string_long_1
352
- if matched = scanner.scan(STRING_LITERAL_LONG1)
353
- token(:STRING_LITERAL_LONG1, self.class.unescape_string(scanner[1]))
354
- end
355
- end
356
-
357
- def match_string_long_2
358
- if matched = scanner.scan(STRING_LITERAL_LONG2)
359
- token(:STRING_LITERAL_LONG2, self.class.unescape_string(scanner[1]))
360
- end
361
- end
362
-
363
- def match_string_1
364
- if matched = scanner.scan(STRING_LITERAL1)
365
- token(:STRING_LITERAL1, self.class.unescape_string(scanner[1]))
366
- end
367
- end
368
-
369
- def match_string_2
370
- if matched = scanner.scan(STRING_LITERAL2)
371
- token(:STRING_LITERAL2, self.class.unescape_string(scanner[1]))
372
- end
373
- end
374
-
375
- def match_langtag
376
- if matched = scanner.scan(LANGTAG)
377
- token(:LANGTAG, scanner[1].to_s)
378
- end
379
- end
380
-
381
- def match_double
382
- if matched = scanner.scan(DOUBLE)
383
- token(:DOUBLE, matched)
384
- end
385
- end
386
-
387
- def match_decimal
388
- if matched = scanner.scan(DECIMAL)
389
- token(:DECIMAL, matched)
390
- end
391
- end
392
-
393
- def match_integer
394
- if matched = scanner.scan(INTEGER)
395
- token(:INTEGER, matched)
396
- end
397
- end
398
-
399
- def match_boolean_literal
400
- if matched = scanner.scan(BooleanLiteral)
401
- token(:BooleanLiteral, matched)
402
- end
403
- end
404
-
405
- def match_blank_node_label
406
- if matched = scanner.scan(BLANK_NODE_LABEL)
407
- token(:BLANK_NODE_LABEL, scanner[1].to_s)
408
- end
409
- end
410
-
411
- def match_nil
412
- if matched = scanner.scan(NIL)
413
- token(:NIL)
414
- end
415
- end
416
-
417
- def match_anon
418
- if matched = scanner.scan(ANON)
419
- token(:ANON)
420
- end
421
- end
422
-
423
- def match_keyword
424
- if matched = scanner.scan(KEYWORD)
425
- token(nil, matched.upcase.to_s)
426
- end
427
- end
428
-
429
- def match_delimiter
430
- if matched = scanner.scan(DELIMITER)
431
- token(nil, matched.to_s)
432
- end
433
- end
434
-
435
- def match_operator
436
- if matched = scanner.scan(OPERATOR)
437
- token(nil, matched.to_s)
438
- end
439
- end
440
-
441
- protected
442
-
443
- ##
444
- # Constructs a new token object annotated with the current line number.
445
- #
446
- # The parser relies on the type being a symbolized URI and the value being
447
- # a string, if there is no type. If there is a type, then the value takes
448
- # on the native representation appropriate for that type.
449
- #
450
- # @param [Symbol] type
451
- # @param [Object] value
452
- # @return [Token]
453
- def token(type, value = nil)
454
- Token.new(type, value, :lineno => lineno)
455
- end
456
-
457
- ##
458
- # Represents a lexer token.
459
- #
460
- # @example Creating a new token
461
- # token = SPARQL::Grammar::Lexer::Token.new(:LANGTAG, :en)
462
- # token.type #=> :LANGTAG
463
- # token.value #=> "en"
464
- #
465
- # @see http://en.wikipedia.org/wiki/Lexical_analysis#Token
466
- class Token
467
- ##
468
- # Initializes a new token instance.
469
- #
470
- # @param [Symbol] type
471
- # @param [Object] value
472
- # @param [Hash{Symbol => Object}] options
473
- # @option options [Integer] :lineno (nil)
474
- def initialize(type, value = nil, options = {})
475
- @type, @value = (type ? type.to_s.to_sym : nil), value
476
- @options = options.dup
477
- @lineno = @options.delete(:lineno)
478
- end
479
-
480
- ##
481
- # The token's symbol type.
482
- #
483
- # @return [Symbol]
484
- attr_reader :type
485
-
486
- ##
487
- # The token's value.
488
- #
489
- # @return [Object]
490
- attr_reader :value
491
-
492
- ##
493
- # The line number where the token was encountered.
494
- #
495
- # @return [Integer]
496
- attr_reader :lineno
497
-
498
- ##
499
- # Any additional options for the token.
500
- #
501
- # @return [Hash]
502
- attr_reader :options
503
-
504
- ##
505
- # Returns the attribute named by `key`.
506
- #
507
- # @param [Symbol] key
508
- # @return [Object]
509
- def [](key)
510
- key = key.to_s.to_sym unless key.is_a?(Integer) || key.is_a?(Symbol)
511
- case key
512
- when 0, :type then @type
513
- when 1, :value then @value
514
- else nil
515
- end
516
- end
517
-
518
- ##
519
- # Returns `true` if the given `value` matches either the type or value
520
- # of this token.
521
- #
522
- # @example Matching using the symbolic type
523
- # SPARQL::Grammar::Lexer::Token.new(:NIL) === :NIL #=> true
524
- #
525
- # @example Matching using the string value
526
- # SPARQL::Grammar::Lexer::Token.new(nil, "{") === "{" #=> true
527
- #
528
- # @param [Symbol, String] value
529
- # @return [Boolean]
530
- def ===(value)
531
- case value
532
- when Symbol then value == @type
533
- when ::String then value.to_s == @value.to_s
534
- else value == @value
535
- end
536
- end
537
-
538
- ##
539
- # Returns a hash table representation of this token.
540
- #
541
- # @return [Hash]
542
- def to_hash
543
- {:type => @type, :value => @value}
544
- end
545
-
546
- ##
547
- # Returns type, if not nil, otherwise value
548
- def representation
549
- @type ? @type : @value
550
- end
551
-
552
- ##
553
- # Returns an array representation of this token.
554
- #
555
- # @return [Array]
556
- def to_a
557
- [@type, @value]
558
- end
559
-
560
- ##
561
- # Returns a developer-friendly representation of this token.
562
- #
563
- # @return [String]
564
- def inspect
565
- to_hash.inspect
566
- end
567
- end # class Token
568
-
569
- ##
570
- # Raised for errors during lexical analysis.
571
- #
572
- # @example Raising a lexer error
573
- # raise SPARQL::Grammar::Lexer::Error.new(
574
- # "invalid token '%' on line 10",
575
- # :input => query, :token => '%', :lineno => 9)
576
- #
577
- # @see http://ruby-doc.org/core/classes/StandardError.html
578
- class Error < StandardError
579
- ##
580
- # The input string associated with the error.
581
- #
582
- # @return [String]
583
- attr_reader :input
584
-
585
- ##
586
- # The invalid token which triggered the error.
587
- #
588
- # @return [String]
589
- attr_reader :token
590
-
591
- ##
592
- # The line number where the error occurred.
593
- #
594
- # @return [Integer]
595
- attr_reader :lineno
596
-
597
- ##
598
- # Initializes a new lexer error instance.
599
- #
600
- # @param [String, #to_s] message
601
- # @param [Hash{Symbol => Object}] options
602
- # @option options [String] :input (nil)
603
- # @option options [String] :token (nil)
604
- # @option options [Integer] :lineno (nil)
605
- def initialize(message, options = {})
606
- @input = options[:input]
607
- @token = options[:token]
608
- @lineno = options[:lineno]
609
- super(message.to_s)
610
- end
611
- end # class Error
612
- end # class Lexer
613
- end; end # module SPARQL::Grammar