wikiscript-parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Manifest.txt +8 -0
- data/README.md +31 -0
- data/Rakefile +29 -0
- data/lib/wikiscript-parser.rb +25 -0
- data/lib/wikiscript-parser/parser.rb +205 -0
- data/lib/wikiscript-parser/version.rb +17 -0
- data/lib/wikiscript/parser.rb +5 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4cc732979e1e25604b1cdbe22b32257cb6a45f81
|
4
|
+
data.tar.gz: 6f0c638ffff2721893648fd14c806ccda48c129a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5be2b8eb71b3cd149ee292dd95309f959661092ebbf17a50633a195fa879178c4a087ae3af503bc6fe322e9d113d0f347f9e5e14272f30e296e973dd9494658b
|
7
|
+
data.tar.gz: b2401e1c920ae981e20b77b8399b8e03791cf38901440fc9e35a584c4bcf68d6c633b10315168d68e4a30806ccb4fa6c9b923d152274217dd6d586f992424a3f
|
data/CHANGELOG.md
ADDED
data/Manifest.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# wikiscript-parser - wikitext/script parser (builds abstract syntax tree 'n' more )
|
2
|
+
|
3
|
+
* home :: [github.com/wikiscript/wikiscript](https://github.com/wikiscript/wikiscript)
|
4
|
+
* bugs :: [github.com/wikiscript/wikiscript/issues](https://github.com/wikiscript/wikiscript/issues)
|
5
|
+
* gem :: [rubygems.org/gems/wikiscript-parser](https://rubygems.org/gems/wikiscript-parser)
|
6
|
+
* rdoc :: [rubydoc.info/gems/wikiscript-parser](http://rubydoc.info/gems/wikiscript-parser)
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
|
13
|
+
to be done
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
## Install
|
18
|
+
|
19
|
+
Use
|
20
|
+
|
21
|
+
gem install wikiscript-parser
|
22
|
+
|
23
|
+
or add to your Gemfile
|
24
|
+
|
25
|
+
gem 'wikiscript-parser'
|
26
|
+
|
27
|
+
|
28
|
+
## License
|
29
|
+
|
30
|
+
The `wikiscript` scripts are dedicated to the public domain.
|
31
|
+
Use it as you please with no restrictions whatsoever.
|
data/Rakefile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/wikiscript-parser/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'wikiscript-parser' do
|
5
|
+
|
6
|
+
self.version = Wikiscript::Module::Parser::VERSION
|
7
|
+
|
8
|
+
self.summary = "wikiscript-parser - wikitext/script parser (builds abstract syntax tree 'n' more)"
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = { home: 'https://github.com/wikiscript/wikiscript' }
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'opensport@googlegroups.com'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'CHANGELOG.md'
|
19
|
+
|
20
|
+
self.extra_deps = [
|
21
|
+
['wikitree' ],
|
22
|
+
]
|
23
|
+
|
24
|
+
self.licenses = ['Public Domain']
|
25
|
+
|
26
|
+
self.spec_extras = {
|
27
|
+
required_ruby_version: '>= 2.2.2'
|
28
|
+
}
|
29
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
## stdlibs
|
2
|
+
require 'strscan'
|
3
|
+
|
4
|
+
|
5
|
+
## 3rd party gems/libs
|
6
|
+
require 'wikitree'
|
7
|
+
|
8
|
+
# our own code
|
9
|
+
require 'wikiscript-parser/version' # let it always go first
|
10
|
+
require 'wikiscript-parser/parser'
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
####
|
16
|
+
# convenience all-in-one parse helper - add - why? why not?
|
17
|
+
module Wikiscript
|
18
|
+
def self.parse( text )
|
19
|
+
Parser.new( text ).parse
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
puts Wikiscript::Module::Parser.banner
|
@@ -0,0 +1,205 @@
|
|
1
|
+
module Wikiscript
|
2
|
+
|
3
|
+
|
4
|
+
class Parser
|
5
|
+
####
|
6
|
+
# convenience all-in-one parse helper
|
7
|
+
def self.parse( text )
|
8
|
+
new( text ).parse
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
def initialize( text )
|
13
|
+
@text = text
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse
|
17
|
+
parse_lines( @text )
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
def skip_whitespaces( input ) ## incl. multiple newlines
|
24
|
+
return 0 if input.eos?
|
25
|
+
|
26
|
+
input.scan( /[ \t\r\n]*/ )
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
# Whereas MediaWiki variable names are all uppercase,
|
31
|
+
# template names have the same basic features and limitations as all page names:
|
32
|
+
# they are case-sensitive (except for the first character);
|
33
|
+
# underscores are parsed as spaces;
|
34
|
+
# and they cannot contain any of these characters: # < > [ ] | { }.
|
35
|
+
# This is because those are reserved for wiki markup and HTML.
|
36
|
+
|
37
|
+
TEMPLATE_BEGIN_RE = /\{\{/ ## e.g {{
|
38
|
+
TEMPLATE_END_RE = /\}\}/ ## e.g. }}
|
39
|
+
|
40
|
+
## todo/fix: check how to add # too!!!
|
41
|
+
## todo: check what chars to escape in character class
|
42
|
+
## change to something line [^|<>\[\]{}]+ ]
|
43
|
+
TEMPLATE_NAME_RE = /[a-z0-9 _-]+/i
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
def parse_template( input )
|
48
|
+
input.scan( TEMPLATE_BEGIN_RE ) ## e.g.{{
|
49
|
+
skip_whitespaces( input )
|
50
|
+
|
51
|
+
name = input.scan( TEMPLATE_NAME_RE )
|
52
|
+
name = name.strip ## strip trailing spaces?
|
53
|
+
puts "==> (begin) template >#{name}<"
|
54
|
+
skip_whitespaces( input )
|
55
|
+
|
56
|
+
params = []
|
57
|
+
loop do
|
58
|
+
if input.check( TEMPLATE_END_RE ) ## e.g. }}
|
59
|
+
input.scan( TEMPLATE_END_RE )
|
60
|
+
puts "<== (end) template >#{name}<"
|
61
|
+
## puts " params:"
|
62
|
+
## pp params
|
63
|
+
return Wikitree::Template.new( name, params )
|
64
|
+
elsif input.check( /\|/ ) ## e.g. |
|
65
|
+
puts " param #{params.size+1} (#{name}):"
|
66
|
+
param_name, param_value = parse_param( input )
|
67
|
+
params << [param_name, param_value]
|
68
|
+
else
|
69
|
+
puts "!! SYNTAX ERROR: expected closing }} or para | in template:"
|
70
|
+
puts input.peek( 100 )
|
71
|
+
exit 1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
def parse_param( input )
|
79
|
+
input.scan( /\|/ )
|
80
|
+
skip_whitespaces( input )
|
81
|
+
|
82
|
+
name = nil
|
83
|
+
value = [] # note: value is an array of ast nodes!!!
|
84
|
+
|
85
|
+
## check for named param e.g. hello=
|
86
|
+
## otherwise assume content
|
87
|
+
if input.check( /[a-z0-9 _-]+(?==)/i ) ## note: use positive lookhead (=)
|
88
|
+
name = input.scan( /[a-z0-9 _-]+/i )
|
89
|
+
name = name.strip ## strip trailing spaces?
|
90
|
+
puts " param name >#{name}<"
|
91
|
+
input.scan( /=/ )
|
92
|
+
skip_whitespaces( input )
|
93
|
+
|
94
|
+
if input.check( /\|/ ) ||
|
95
|
+
input.check( /\}/ ) ## add/allow }} too? - why? why not?
|
96
|
+
## allow empty value!!!
|
97
|
+
puts "!! WARN: empty value for param >#{name}<"
|
98
|
+
else
|
99
|
+
value = parse_param_value( input ) ## get keyed param value
|
100
|
+
puts " param value >#{value}<"
|
101
|
+
end
|
102
|
+
else
|
103
|
+
if input.check( /\|/ ) || ## add/allow }} too? - why? why not?
|
104
|
+
input.check( /\}/ )
|
105
|
+
## allow empty value here too - why? why not?
|
106
|
+
puts "!! WARN: empty value for (unnamed/positioned) param"
|
107
|
+
else
|
108
|
+
value = parse_param_value( input ) ## get (unnamed) param value
|
109
|
+
puts " param value >#{value}<"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
[name, value]
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
def parse_param_value( input ) ## todo: change to parse_param_value_nodes or such - why? why not??
|
117
|
+
# puts " [debug] parse_param_value >#{input.peek(10)}...<"
|
118
|
+
|
119
|
+
values = [] ## todo - change/rename to nodes??
|
120
|
+
loop do
|
121
|
+
values << parse_node( input )
|
122
|
+
skip_whitespaces( input )
|
123
|
+
|
124
|
+
## puts " [debug] peek >#{input.peek(10)}...<"
|
125
|
+
if input.check( /\|/ ) || input.check( /\}\}/ )
|
126
|
+
## puts " [debug] break param_value"
|
127
|
+
break
|
128
|
+
end
|
129
|
+
|
130
|
+
if input.eos?
|
131
|
+
puts "!! SYNTAX ERROR: unexpected end of string in param value; expected ending w/ | or }}"
|
132
|
+
exit 1
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
values
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
def parse_link( input ) ## todo/fix: change to parse_page - why? why not?
|
141
|
+
input.scan( /\[\[/ )
|
142
|
+
|
143
|
+
## page name
|
144
|
+
name = input.scan( /[^|\]]+/ ).strip
|
145
|
+
alt_name = if input.check( /\|/ ) ## optional alternate/display name
|
146
|
+
input.scan( /\|/ ) ## eat up |
|
147
|
+
input.scan( /[^\]]+/ ).strip
|
148
|
+
else
|
149
|
+
nil
|
150
|
+
end
|
151
|
+
|
152
|
+
input.scan( /\]\]/ ) ## eatup ]]
|
153
|
+
skip_whitespaces( input )
|
154
|
+
|
155
|
+
if alt_name
|
156
|
+
puts " @page<#{name} | #{alt_name}>"
|
157
|
+
else
|
158
|
+
puts " @page<#{name}>"
|
159
|
+
end
|
160
|
+
|
161
|
+
Wikitree::Page.new( name, alt_name )
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
def parse_node( input )
|
166
|
+
## puts " [debug] parse >#{input.peek(10)}...<"
|
167
|
+
if input.check( TEMPLATE_BEGIN_RE )
|
168
|
+
parse_template( input )
|
169
|
+
elsif input.check( /\[\[/ )
|
170
|
+
parse_link( input )
|
171
|
+
elsif input.check( /[^|{}\[\]]+/ ) ## check for rawtext run for now
|
172
|
+
run = input.scan( /[^|{}\[\]]+/ ).strip
|
173
|
+
# puts " text run=>#{run}<"
|
174
|
+
Wikitree::Text.new( run )
|
175
|
+
else
|
176
|
+
puts " !! SYNTAX ERROR: unknown content type:"
|
177
|
+
puts input.peek( 100 )
|
178
|
+
exit 1
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
|
183
|
+
def parse_lines( text )
|
184
|
+
## note: remove all html comments for now - why? why not?
|
185
|
+
## <!-- Area rank should match .. -->
|
186
|
+
text = text.gsub( /<!--.+?-->/m ) do |m| ## note: use .+? (non-greedy match)
|
187
|
+
puts " removing comment >#{m}<"
|
188
|
+
''
|
189
|
+
end
|
190
|
+
|
191
|
+
input = StringScanner.new( text )
|
192
|
+
|
193
|
+
nodes = []
|
194
|
+
loop do
|
195
|
+
skip_whitespaces( input )
|
196
|
+
break if input.eos?
|
197
|
+
|
198
|
+
nodes << parse_node( input )
|
199
|
+
end
|
200
|
+
nodes
|
201
|
+
end
|
202
|
+
end # class Parser
|
203
|
+
|
204
|
+
|
205
|
+
end # module Wikiscript
|
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
module Wikiscript
|
3
|
+
module Module
|
4
|
+
module Parser
|
5
|
+
VERSION = '0.0.1'
|
6
|
+
|
7
|
+
def self.banner
|
8
|
+
"wikiscript-parser/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.root
|
12
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
13
|
+
end
|
14
|
+
|
15
|
+
end # module Parser
|
16
|
+
end # module Module
|
17
|
+
end # module Wikiscript
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wikiscript-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-11-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: wikitree
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rdoc
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '4.0'
|
34
|
+
- - "<"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '7'
|
37
|
+
type: :development
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '4.0'
|
44
|
+
- - "<"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '7'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: hoe
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - "~>"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '3.22'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - "~>"
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '3.22'
|
61
|
+
description: wikiscript-parser - wikitext/script parser (builds abstract syntax tree
|
62
|
+
'n' more)
|
63
|
+
email: opensport@googlegroups.com
|
64
|
+
executables: []
|
65
|
+
extensions: []
|
66
|
+
extra_rdoc_files:
|
67
|
+
- CHANGELOG.md
|
68
|
+
- Manifest.txt
|
69
|
+
- README.md
|
70
|
+
files:
|
71
|
+
- CHANGELOG.md
|
72
|
+
- Manifest.txt
|
73
|
+
- README.md
|
74
|
+
- Rakefile
|
75
|
+
- lib/wikiscript-parser.rb
|
76
|
+
- lib/wikiscript-parser/parser.rb
|
77
|
+
- lib/wikiscript-parser/version.rb
|
78
|
+
- lib/wikiscript/parser.rb
|
79
|
+
homepage: https://github.com/wikiscript/wikiscript
|
80
|
+
licenses:
|
81
|
+
- Public Domain
|
82
|
+
metadata: {}
|
83
|
+
post_install_message:
|
84
|
+
rdoc_options:
|
85
|
+
- "--main"
|
86
|
+
- README.md
|
87
|
+
require_paths:
|
88
|
+
- lib
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 2.2.2
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 2.5.2
|
102
|
+
signing_key:
|
103
|
+
specification_version: 4
|
104
|
+
summary: wikiscript-parser - wikitext/script parser (builds abstract syntax tree 'n'
|
105
|
+
more)
|
106
|
+
test_files: []
|