ebnf 1.1.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +218 -196
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +40 -21
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +13 -12
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +171 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +7 -2
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +128 -87
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +140 -8
  44. data/lib/ebnf/ll1/lexer.rb +37 -32
  45. data/lib/ebnf/ll1/parser.rb +113 -73
  46. data/lib/ebnf/ll1/scanner.rb +84 -51
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +554 -0
  51. data/lib/ebnf/peg/rule.rb +241 -0
  52. data/lib/ebnf/rule.rb +453 -163
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +554 -85
  55. metadata +98 -20
  56. data/etc/sparql.rb +0 -45773
data/UNLICENSE CHANGED
@@ -21,4 +21,4 @@ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
21
  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
22
  OTHER DEALINGS IN THE SOFTWARE.
23
23
 
24
- For more information, please refer to <http://unlicense.org/>
24
+ For more information, please refer to <https://unlicense.org/>
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.1.1
1
+ 2.1.0
data/bin/ebnf CHANGED
@@ -15,6 +15,7 @@ options = {
15
15
  output_format: :sxp,
16
16
  prefix: "ttl",
17
17
  namespace: "http://www.w3.org/ns/formats/Turtle#",
18
+ level: 4
18
19
  }
19
20
 
20
21
  input, out = nil, STDOUT
@@ -23,14 +24,17 @@ OPT_ARGS = [
23
24
  ["--debug", GetoptLong::NO_ARGUMENT, "Turn on debugging output"],
24
25
  ["--bnf", GetoptLong::NO_ARGUMENT, "Transform EBNF to BNF"],
25
26
  ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT,"Evaluate argument as an EBNF document"],
27
+ ["--format", "-f", GetoptLong::REQUIRED_ARGUMENT,"Specify output format one of abnf, abnfh, ebnf, html, isoebnf, isoebnfh, ttl, sxp, or rb"],
28
+ ["--input-format", GetoptLong::REQUIRED_ARGUMENT,"Specify input format one of abnf, ebnf isoebnf, native, or sxp"],
26
29
  ["--ll1", GetoptLong::REQUIRED_ARGUMENT,"Generate First/Follow rules, argument is start symbol"],
27
- ["--format", "-f", GetoptLong::REQUIRED_ARGUMENT,"Specify output format one of ebnf, html, ttl, sxp, or rb"],
28
- ["--input-format", GetoptLong::REQUIRED_ARGUMENT,"Specify input format one of ebnf or sxp"],
29
30
  ["--mod-name", GetoptLong::REQUIRED_ARGUMENT,"Module name used when creating ruby tables"],
31
+ ["--namespace", "-n", GetoptLong::REQUIRED_ARGUMENT,"Namespace to use when generating Turtle"],
30
32
  ["--output", "-o", GetoptLong::REQUIRED_ARGUMENT,"Output to the specified file path"],
33
+ ["--peg", GetoptLong::NO_ARGUMENT, "Transform EBNF to PEG"],
31
34
  ["--prefix", "-p", GetoptLong::REQUIRED_ARGUMENT,"Prefix to use when generating Turtle"],
32
35
  ["--progress", "-v", GetoptLong::NO_ARGUMENT, "Detail on execution"],
33
- ["--namespace", "-n", GetoptLong::REQUIRED_ARGUMENT,"Namespace to use when generating Turtle"],
36
+ ["--renumber", GetoptLong::NO_ARGUMENT, "Renumber parsed reules"],
37
+ ["--validate", GetoptLong::NO_ARGUMENT, "Validate grammar"],
34
38
  ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"]
35
39
  ]
36
40
  def usage
@@ -53,42 +57,57 @@ opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]})
53
57
 
54
58
  opts.each do |opt, arg|
55
59
  case opt
56
- when '--debug' then options[:debug] = true
60
+ when '--debug' then options[:level] = 0
57
61
  when '--bnf' then options[:bnf] = true
58
62
  when '--evaluate' then input = arg
59
- when '--input-format' then options[:format] = arg.to_sym
60
- when '--format' then options[:output_format] = arg.to_sym
61
- when '--ll1' then (options[:ll1] ||= []) <<arg.to_sym
63
+ when '--input-format'
64
+ unless %w(abnf ebnf isoebnf native sxp).include?(arg)
65
+ STDERR.puts("unrecognized input format #{arg}")
66
+ usage
67
+ end
68
+ options[:format] = arg.to_sym
69
+ when '--format'
70
+ unless %w(abnf abnfh ebnf html isoebnf isoebnfh rb sxp).include?(arg)
71
+ STDERR.puts("unrecognized output format #{arg}")
72
+ usage
73
+ end
74
+ options[:output_format] = arg.to_sym
75
+ when '--ll1' then (options[:ll1] ||= []) << arg.to_sym
62
76
  when '--mod-name' then options[:mod_name] = arg
63
77
  when '--output' then out = File.open(arg, "w")
78
+ when '--peg' then options[:peg] = true
64
79
  when '--prefix' then options[:prefix] = arg
80
+ when '--renumber' then options[:renumber] = true
65
81
  when '--namespace' then options[:namespace] = arg
66
- when '--progress' then options[:progress] = true
82
+ when '--progress' then options[:level] = 1 unless options[:level] == 0
83
+ when '--validate' then options[:validate] = true
67
84
  when '--help' then usage
68
85
  end
69
86
  end
70
87
 
71
- if options[:output_format] == :rb && !options[:ll1]
72
- STDERR.puts "outputing in .rb format requires -ll"
73
- exit(1)
74
- end
75
-
76
88
  input = File.open(ARGV[0]) if ARGV[0]
77
89
 
78
- ebnf = EBNF.parse(input || STDIN, options)
90
+ ebnf = EBNF.parse(input || STDIN, **options)
79
91
  ebnf.make_bnf if options[:bnf] || options[:ll1]
92
+ ebnf.make_peg if options[:peg]
80
93
  if options[:ll1]
81
94
  ebnf.first_follow(*options[:ll1])
82
95
  ebnf.build_tables
83
-
96
+ end
97
+
98
+ ebnf.renumber! if options[:renumber]
84
99
 
85
100
  res = case options[:output_format]
86
- when :ebnf then ebnf.to_s
87
- when :html then ebnf.to_html
88
- when :sxp then ebnf.to_sxp
89
- when :ttl then ebnf.to_ttl(options[:prefix], options[:namespace])
90
- when :rb then ebnf.to_ruby(out, options.merge(grammarFile: ARGV[0]))
91
- else ebnf.ast.inspect
101
+ when :abnf then ebnf.to_s(format: :abnf)
102
+ when :abnfh then ebnf.to_html(format: :abnf)
103
+ when :ebnf then ebnf.to_s
104
+ when :html then ebnf.to_html
105
+ when :isoebnf then ebnf.to_s(format: :isoebnf)
106
+ when :isoebnfh then ebnf.to_html(format: :isoebnf)
107
+ when :sxp then ebnf.to_sxp
108
+ when :ttl then ebnf.to_ttl(options[:prefix], options[:namespace])
109
+ when :rb then ebnf.to_ruby(out, grammarFile: ARGV[0], **options)
110
+ else ebnf.ast.inspect
92
111
  end
93
112
 
94
113
  out.puts res
@@ -0,0 +1,52 @@
1
+ # Core terminals available in uses of ABNF
2
+ ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z
3
+
4
+ BIT ::= '0' | '1'
5
+
6
+ CHAR ::= [#x01-#x7F]
7
+ # any 7-bit US-ASCII character,
8
+ # excluding NUL
9
+ CR ::= #x0D
10
+ # carriage return
11
+
12
+ CRLF ::= CR? LF
13
+ # Internet standard newline
14
+
15
+ CTL ::= [#x00-#x1F] | #x7F
16
+ # controls
17
+
18
+ DIGIT ::= [#x30-#x39]
19
+ # 0-9
20
+
21
+ DQUOTE ::= #x22
22
+ # " (Double Quote)
23
+
24
+ HEXDIG ::= DIGIT | [A-F] # [0-9A-F]
25
+
26
+ HTAB ::= #x09
27
+ # horizontal tab
28
+
29
+ LF ::= #x0A
30
+ # linefeed
31
+
32
+ LWSP ::= (WSP | CRLF WSP)*
33
+ # Use of this linear-white-space rule
34
+ # permits lines containing only white
35
+ # space that are no longer legal in
36
+ # mail headers and have caused
37
+ # interoperability problems in other
38
+ # contexts.
39
+ # Do not use when defining mail
40
+ # headers and use with caution in
41
+ # other contexts.
42
+
43
+ OCTET ::= [#x00-#xFF]
44
+ # 8 bits of data
45
+
46
+ SP ::= #x20
47
+
48
+ VCHAR ::= [#x21-#x7E]
49
+ # visible (printing) characters
50
+
51
+ WSP ::= SP | HTAB
52
+ # white space
@@ -0,0 +1,121 @@
1
+ rulelist = 1*( rule / (*c-wsp c-nl) )
2
+
3
+ rule = rulename defined-as elements c-nl
4
+ ; continues if next line starts
5
+ ; with white space
6
+
7
+ rulename = ALPHA *(ALPHA / DIGIT / "-")
8
+
9
+ defined-as = *c-wsp ("=" / "=/") *c-wsp
10
+ ; basic rules definition and
11
+ ; incremental alternatives
12
+
13
+ elements = alternation *c-wsp
14
+
15
+ c-wsp = WSP / (c-nl WSP)
16
+
17
+ c-nl = comment / CRLF
18
+ ; comment or newline
19
+
20
+ comment = ";" *(WSP / VCHAR) CRLF
21
+
22
+ alternation = concatenation
23
+ *(*c-wsp "/" *c-wsp concatenation)
24
+
25
+ concatenation = repetition *(1*c-wsp repetition)
26
+
27
+ repetition = [repeat] element
28
+
29
+ repeat = (*DIGIT "*" *DIGIT) / 1*DIGIT
30
+
31
+ element = rulename / group / option /
32
+ char-val / num-val / prose-val
33
+
34
+ group = "(" *c-wsp alternation *c-wsp ")"
35
+
36
+ option = "[" *c-wsp alternation *c-wsp "]"
37
+
38
+ char-val = case-insensitive-string /
39
+ case-sensitive-string
40
+
41
+ case-insensitive-string =
42
+ [ "%i" ] quoted-string
43
+
44
+ case-sensitive-string =
45
+ "%s" quoted-string
46
+
47
+ quoted-string = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
48
+ ; quoted string of SP and VCHAR
49
+ ; without DQUOTE
50
+
51
+ num-val = "%" (bin-val / dec-val / hex-val)
52
+
53
+ bin-val = "b" 1*BIT
54
+ [ 1*("." 1*BIT) / ("-" 1*BIT) ]
55
+ ; series of concatenated bit values
56
+ ; or single ONEOF range
57
+
58
+ dec-val = "d" 1*DIGIT
59
+ [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
60
+
61
+ hex-val = "x" 1*HEXDIG
62
+ [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
63
+
64
+ prose-val = "<" *(%x20-3D / %x3F-7E) ">"
65
+ ; bracketed string of SP and VCHAR
66
+ ; without angles
67
+ ; prose description, to be used as
68
+ ; last resort
69
+
70
+ ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
71
+
72
+ BIT = "0" / "1"
73
+
74
+ CHAR = %x01-7F
75
+ ; any 7-bit US-ASCII character,
76
+ ; excluding NUL
77
+ CR = %x0D
78
+ ; carriage return
79
+
80
+ CRLF = [CR] LF
81
+ ; Internet standard newline
82
+ ; Extended to allow only newline
83
+
84
+ CTL = %x00-1F / %x7F
85
+ ; controls
86
+
87
+ DIGIT = %x30-39
88
+ ; 0-9
89
+
90
+ DQUOTE = %x22
91
+ ; " (Double Quote)
92
+
93
+ HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
94
+
95
+ HTAB = %x09
96
+ ; horizontal tab
97
+
98
+ LF = %x0A
99
+ ; linefeed
100
+
101
+ LWSP = *(WSP / CRLF WSP)
102
+ ; Use of this linear-white-space rule
103
+ ; permits lines containing only white
104
+ ; space that are no longer legal in
105
+ ; mail headers and have caused
106
+ ; interoperability problems in other
107
+ ; contexts.
108
+ ; Do not use when defining mail
109
+ ; headers and use with caution in
110
+ ; other contexts.
111
+
112
+ OCTET = %x00-FF
113
+ ; 8 bits of data
114
+
115
+ SP = %x20
116
+
117
+ VCHAR = %x21-7E
118
+ ; visible (printing) characters
119
+
120
+ WSP = SP / HTAB
121
+ ; white space
@@ -0,0 +1,124 @@
1
+ rulelist ::= ( rule | (c_wsp* c_nl) )+
2
+
3
+ rule ::= rulename defined_as elements c_nl
4
+ # continues if next line starts
5
+ # with white space
6
+
7
+ elements ::= alternation c_wsp*
8
+
9
+ alternation ::= concatenation
10
+ (c_wsp* "/" c_wsp* concatenation)*
11
+
12
+ concatenation::= repetition (c_wsp+ repetition)*
13
+
14
+ repetition ::= repeat? element
15
+
16
+ repeat ::= (DIGIT* "*" DIGIT*) | DIGIT+
17
+
18
+ element ::= rulename | group | option |
19
+ char_val | num_val | prose_val
20
+
21
+ group ::= "(" c_wsp* alternation c_wsp* ")"
22
+
23
+ option ::= "[" c_wsp* alternation c_wsp* "]"
24
+
25
+ char_val ::= case_insensitive_string |
26
+ case_sensitive_string
27
+
28
+ case_insensitive_string ::=
29
+ "%i"? quoted_string
30
+
31
+ case_sensitive_string ::=
32
+ "%s" quoted_string
33
+
34
+ num_val ::= "%" (bin_val | dec_val | hex_val)
35
+
36
+ @terminals
37
+
38
+ # Terminals used in ABNF, itself
39
+ rulename ::= ALPHA (ALPHA | DIGIT | "-")*
40
+
41
+ defined_as ::= c_wsp* ("=" | "=/") c_wsp*
42
+ # basic rules definition and
43
+ # incremental alternatives
44
+
45
+ c_wsp ::= WSP | (c_nl WSP)
46
+
47
+ c_nl ::= COMMENT | CRLF
48
+ # comment or newline
49
+
50
+ comment ::= ";" (WSP | VCHAR)* CRLF
51
+
52
+ quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE
53
+ # quoted string of SP and VCHAR
54
+ # without DQUOTE
55
+
56
+ bin_val ::= "b" BIT+
57
+ (("." BIT+)+ | ("-" BIT+))?
58
+ # series of concatenated bit values
59
+ # or single ONEOF range
60
+
61
+ dec_val ::= "d" DIGIT+
62
+ (("." DIGIT+)+ | ("-" DIGIT+))?
63
+
64
+ hex_val ::= "x" HEXDIG+
65
+ (("." HEXDIG+)+ | ("-" HEXDIG+))?
66
+
67
+ prose_val ::= "<" [#x20-#x3D#x3F-#x7E]* ">"
68
+ # bracketed string of SP and VCHAR
69
+ # without angles
70
+ # prose description, to be used as
71
+ # last resort
72
+
73
+ # Core terminals available in uses of ABNF
74
+ ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z
75
+
76
+ BIT ::= '0' | '1'
77
+
78
+ CHAR ::= [#x01-#x7F]
79
+ # any 7-bit US-ASCII character,
80
+ # excluding NUL
81
+ CR ::= #x0D
82
+ # carriage return
83
+
84
+ CRLF ::= CR? LF
85
+ # Internet standard newline
86
+
87
+ CTL ::= [#x00-#x1F] | #x7F
88
+ # controls
89
+
90
+ DIGIT ::= [#x30-#x39]
91
+ # 0-9
92
+
93
+ DQUOTE ::= #x22
94
+ # " (Double Quote)
95
+
96
+ HEXDIG ::= DIGIT | "A" | "B" | "C" | "D" | "E" | "F"
97
+
98
+ HTAB ::= #x09
99
+ # horizontal tab
100
+
101
+ LF ::= #x0A
102
+ # linefeed
103
+
104
+ LWSP ::= (WSP | CRLF WSP)*
105
+ # Use of this linear-white-space rule
106
+ # permits lines containing only white
107
+ # space that are no longer legal in
108
+ # mail headers and have caused
109
+ # interoperability problems in other
110
+ # contexts.
111
+ # Do not use when defining mail
112
+ # headers and use with caution in
113
+ # other contexts.
114
+
115
+ OCTET ::= [#x00-#xFF]
116
+ # 8 bits of data
117
+
118
+ SP ::= #x20
119
+
120
+ VCHAR ::= [#x21-#x7E]
121
+ # visible (printing) characters
122
+
123
+ WSP ::= SP | HTAB
124
+ # white space
@@ -0,0 +1,45 @@
1
+ (
2
+ (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl))))
3
+ (rule rule (seq rulename defined_as elements c_nl))
4
+ (rule elements (seq alternation (star c_wsp)))
5
+ (rule alternation
6
+ (seq concatenation (star (seq (star c_wsp) "/" (star c_wsp) concatenation))))
7
+ (rule concatenation (seq repetition (star (seq (plus c_wsp) repetition))))
8
+ (rule repetition (seq (opt repeat) element))
9
+ (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT)))
10
+ (rule element (alt rulename group option char_val num_val prose_val))
11
+ (rule group (seq "(" (star c_wsp) alternation (star c_wsp) ")"))
12
+ (rule option (seq "[" (star c_wsp) alternation (star c_wsp) "]"))
13
+ (rule char_val (alt case_insensitive_string case_sensitive_string))
14
+ (rule case_insensitive_string (seq (opt "%i") quoted_string))
15
+ (rule case_sensitive_string (seq "%s" quoted_string))
16
+ (rule num_val (seq "%" (alt bin_val dec_val hex_val)))
17
+ (terminals _terminals (seq))
18
+ (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-"))))
19
+ (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp)))
20
+ (terminal c_wsp (alt WSP (seq c_nl WSP)))
21
+ (terminal c_nl (alt COMMENT CRLF))
22
+ (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF))
23
+ (terminal quoted_string (seq DQUOTE (star (range "#x20-#x21#x23-#x7E")) DQUOTE))
24
+ (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT))))))
25
+ (terminal dec_val
26
+ (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT))))))
27
+ (terminal hex_val
28
+ (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG))))))
29
+ (terminal prose_val (seq "<" (star (range "#x20-#x3D#x3F-#x7E")) ">"))
30
+ (terminal ALPHA (range "#x41-#x5A#x61-#x7A"))
31
+ (terminal BIT (alt "0" "1"))
32
+ (terminal CHAR (range "#x01-#x7F"))
33
+ (terminal CR (hex "#x0D"))
34
+ (terminal CRLF (seq (opt CR) LF))
35
+ (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F")))
36
+ (terminal DIGIT (range "#x30-#x39"))
37
+ (terminal DQUOTE (hex "#x22"))
38
+ (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F"))
39
+ (terminal HTAB (hex "#x09"))
40
+ (terminal LF (hex "#x0A"))
41
+ (terminal LWSP (star (alt WSP (seq CRLF WSP))))
42
+ (terminal OCTET (range "#x00-#xFF"))
43
+ (terminal SP (hex "#x20"))
44
+ (terminal VCHAR (range "#x21-#x7E"))
45
+ (terminal WSP (alt SP HTAB)))