wikiscript-parser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Manifest.txt +8 -0
- data/README.md +31 -0
- data/Rakefile +29 -0
- data/lib/wikiscript-parser.rb +25 -0
- data/lib/wikiscript-parser/parser.rb +205 -0
- data/lib/wikiscript-parser/version.rb +17 -0
- data/lib/wikiscript/parser.rb +5 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4cc732979e1e25604b1cdbe22b32257cb6a45f81
|
4
|
+
data.tar.gz: 6f0c638ffff2721893648fd14c806ccda48c129a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5be2b8eb71b3cd149ee292dd95309f959661092ebbf17a50633a195fa879178c4a087ae3af503bc6fe322e9d113d0f347f9e5e14272f30e296e973dd9494658b
|
7
|
+
data.tar.gz: b2401e1c920ae981e20b77b8399b8e03791cf38901440fc9e35a584c4bcf68d6c633b10315168d68e4a30806ccb4fa6c9b923d152274217dd6d586f992424a3f
|
data/CHANGELOG.md
ADDED
data/Manifest.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# wikiscript-parser - wikitext/script parser (builds abstract syntax tree 'n' more )
|
2
|
+
|
3
|
+
* home :: [github.com/wikiscript/wikiscript](https://github.com/wikiscript/wikiscript)
|
4
|
+
* bugs :: [github.com/wikiscript/wikiscript/issues](https://github.com/wikiscript/wikiscript/issues)
|
5
|
+
* gem :: [rubygems.org/gems/wikiscript-parser](https://rubygems.org/gems/wikiscript-parser)
|
6
|
+
* rdoc :: [rubydoc.info/gems/wikiscript-parser](http://rubydoc.info/gems/wikiscript-parser)
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
|
13
|
+
to be done
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
## Install
|
18
|
+
|
19
|
+
Use
|
20
|
+
|
21
|
+
gem install wikiscript-parser
|
22
|
+
|
23
|
+
or add to your Gemfile
|
24
|
+
|
25
|
+
gem 'wikiscript-parser'
|
26
|
+
|
27
|
+
|
28
|
+
## License
|
29
|
+
|
30
|
+
The `wikiscript` scripts are dedicated to the public domain.
|
31
|
+
Use it as you please with no restrictions whatsoever.
|
data/Rakefile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/wikiscript-parser/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'wikiscript-parser' do
|
5
|
+
|
6
|
+
self.version = Wikiscript::Module::Parser::VERSION
|
7
|
+
|
8
|
+
self.summary = "wikiscript-parser - wikitext/script parser (builds abstract syntax tree 'n' more)"
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = { home: 'https://github.com/wikiscript/wikiscript' }
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'opensport@googlegroups.com'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'CHANGELOG.md'
|
19
|
+
|
20
|
+
self.extra_deps = [
|
21
|
+
['wikitree' ],
|
22
|
+
]
|
23
|
+
|
24
|
+
self.licenses = ['Public Domain']
|
25
|
+
|
26
|
+
self.spec_extras = {
|
27
|
+
required_ruby_version: '>= 2.2.2'
|
28
|
+
}
|
29
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
## stdlibs
|
2
|
+
require 'strscan'
|
3
|
+
|
4
|
+
|
5
|
+
## 3rd party gems/libs
|
6
|
+
require 'wikitree'
|
7
|
+
|
8
|
+
# our own code
|
9
|
+
require 'wikiscript-parser/version' # let it always go first
|
10
|
+
require 'wikiscript-parser/parser'
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
####
|
16
|
+
# convenience all-in-one parse helper - add - why? why not?
|
17
|
+
module Wikiscript
|
18
|
+
def self.parse( text )
|
19
|
+
Parser.new( text ).parse
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
puts Wikiscript::Module::Parser.banner
|
@@ -0,0 +1,205 @@
|
|
1
|
+
module Wikiscript
|
2
|
+
|
3
|
+
|
4
|
+
class Parser
|
5
|
+
####
|
6
|
+
# convenience all-in-one parse helper
|
7
|
+
def self.parse( text )
|
8
|
+
new( text ).parse
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
def initialize( text )
|
13
|
+
@text = text
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse
|
17
|
+
parse_lines( @text )
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
def skip_whitespaces( input ) ## incl. multiple newlines
|
24
|
+
return 0 if input.eos?
|
25
|
+
|
26
|
+
input.scan( /[ \t\r\n]*/ )
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
# Whereas MediaWiki variable names are all uppercase,
|
31
|
+
# template names have the same basic features and limitations as all page names:
|
32
|
+
# they are case-sensitive (except for the first character);
|
33
|
+
# underscores are parsed as spaces;
|
34
|
+
# and they cannot contain any of these characters: # < > [ ] | { }.
|
35
|
+
# This is because those are reserved for wiki markup and HTML.
|
36
|
+
|
37
|
+
TEMPLATE_BEGIN_RE = /\{\{/ ## e.g {{
|
38
|
+
TEMPLATE_END_RE = /\}\}/ ## e.g. }}
|
39
|
+
|
40
|
+
## todo/fix: check how to add # too!!!
|
41
|
+
## todo: check what chars to escape in character class
|
42
|
+
## change to something line [^|<>\[\]{}]+ ]
|
43
|
+
TEMPLATE_NAME_RE = /[a-z0-9 _-]+/i
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
def parse_template( input )
|
48
|
+
input.scan( TEMPLATE_BEGIN_RE ) ## e.g.{{
|
49
|
+
skip_whitespaces( input )
|
50
|
+
|
51
|
+
name = input.scan( TEMPLATE_NAME_RE )
|
52
|
+
name = name.strip ## strip trailing spaces?
|
53
|
+
puts "==> (begin) template >#{name}<"
|
54
|
+
skip_whitespaces( input )
|
55
|
+
|
56
|
+
params = []
|
57
|
+
loop do
|
58
|
+
if input.check( TEMPLATE_END_RE ) ## e.g. }}
|
59
|
+
input.scan( TEMPLATE_END_RE )
|
60
|
+
puts "<== (end) template >#{name}<"
|
61
|
+
## puts " params:"
|
62
|
+
## pp params
|
63
|
+
return Wikitree::Template.new( name, params )
|
64
|
+
elsif input.check( /\|/ ) ## e.g. |
|
65
|
+
puts " param #{params.size+1} (#{name}):"
|
66
|
+
param_name, param_value = parse_param( input )
|
67
|
+
params << [param_name, param_value]
|
68
|
+
else
|
69
|
+
puts "!! SYNTAX ERROR: expected closing }} or para | in template:"
|
70
|
+
puts input.peek( 100 )
|
71
|
+
exit 1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
def parse_param( input )
|
79
|
+
input.scan( /\|/ )
|
80
|
+
skip_whitespaces( input )
|
81
|
+
|
82
|
+
name = nil
|
83
|
+
value = [] # note: value is an array of ast nodes!!!
|
84
|
+
|
85
|
+
## check for named param e.g. hello=
|
86
|
+
## otherwise assume content
|
87
|
+
if input.check( /[a-z0-9 _-]+(?==)/i ) ## note: use positive lookhead (=)
|
88
|
+
name = input.scan( /[a-z0-9 _-]+/i )
|
89
|
+
name = name.strip ## strip trailing spaces?
|
90
|
+
puts " param name >#{name}<"
|
91
|
+
input.scan( /=/ )
|
92
|
+
skip_whitespaces( input )
|
93
|
+
|
94
|
+
if input.check( /\|/ ) ||
|
95
|
+
input.check( /\}/ ) ## add/allow }} too? - why? why not?
|
96
|
+
## allow empty value!!!
|
97
|
+
puts "!! WARN: empty value for param >#{name}<"
|
98
|
+
else
|
99
|
+
value = parse_param_value( input ) ## get keyed param value
|
100
|
+
puts " param value >#{value}<"
|
101
|
+
end
|
102
|
+
else
|
103
|
+
if input.check( /\|/ ) || ## add/allow }} too? - why? why not?
|
104
|
+
input.check( /\}/ )
|
105
|
+
## allow empty value here too - why? why not?
|
106
|
+
puts "!! WARN: empty value for (unnamed/positioned) param"
|
107
|
+
else
|
108
|
+
value = parse_param_value( input ) ## get (unnamed) param value
|
109
|
+
puts " param value >#{value}<"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
[name, value]
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
def parse_param_value( input ) ## todo: change to parse_param_value_nodes or such - why? why not??
|
117
|
+
# puts " [debug] parse_param_value >#{input.peek(10)}...<"
|
118
|
+
|
119
|
+
values = [] ## todo - change/rename to nodes??
|
120
|
+
loop do
|
121
|
+
values << parse_node( input )
|
122
|
+
skip_whitespaces( input )
|
123
|
+
|
124
|
+
## puts " [debug] peek >#{input.peek(10)}...<"
|
125
|
+
if input.check( /\|/ ) || input.check( /\}\}/ )
|
126
|
+
## puts " [debug] break param_value"
|
127
|
+
break
|
128
|
+
end
|
129
|
+
|
130
|
+
if input.eos?
|
131
|
+
puts "!! SYNTAX ERROR: unexpected end of string in param value; expected ending w/ | or }}"
|
132
|
+
exit 1
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
values
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
def parse_link( input ) ## todo/fix: change to parse_page - why? why not?
|
141
|
+
input.scan( /\[\[/ )
|
142
|
+
|
143
|
+
## page name
|
144
|
+
name = input.scan( /[^|\]]+/ ).strip
|
145
|
+
alt_name = if input.check( /\|/ ) ## optional alternate/display name
|
146
|
+
input.scan( /\|/ ) ## eat up |
|
147
|
+
input.scan( /[^\]]+/ ).strip
|
148
|
+
else
|
149
|
+
nil
|
150
|
+
end
|
151
|
+
|
152
|
+
input.scan( /\]\]/ ) ## eatup ]]
|
153
|
+
skip_whitespaces( input )
|
154
|
+
|
155
|
+
if alt_name
|
156
|
+
puts " @page<#{name} | #{alt_name}>"
|
157
|
+
else
|
158
|
+
puts " @page<#{name}>"
|
159
|
+
end
|
160
|
+
|
161
|
+
Wikitree::Page.new( name, alt_name )
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
def parse_node( input )
|
166
|
+
## puts " [debug] parse >#{input.peek(10)}...<"
|
167
|
+
if input.check( TEMPLATE_BEGIN_RE )
|
168
|
+
parse_template( input )
|
169
|
+
elsif input.check( /\[\[/ )
|
170
|
+
parse_link( input )
|
171
|
+
elsif input.check( /[^|{}\[\]]+/ ) ## check for rawtext run for now
|
172
|
+
run = input.scan( /[^|{}\[\]]+/ ).strip
|
173
|
+
# puts " text run=>#{run}<"
|
174
|
+
Wikitree::Text.new( run )
|
175
|
+
else
|
176
|
+
puts " !! SYNTAX ERROR: unknown content type:"
|
177
|
+
puts input.peek( 100 )
|
178
|
+
exit 1
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
|
183
|
+
def parse_lines( text )
|
184
|
+
## note: remove all html comments for now - why? why not?
|
185
|
+
## <!-- Area rank should match .. -->
|
186
|
+
text = text.gsub( /<!--.+?-->/m ) do |m| ## note: use .+? (non-greedy match)
|
187
|
+
puts " removing comment >#{m}<"
|
188
|
+
''
|
189
|
+
end
|
190
|
+
|
191
|
+
input = StringScanner.new( text )
|
192
|
+
|
193
|
+
nodes = []
|
194
|
+
loop do
|
195
|
+
skip_whitespaces( input )
|
196
|
+
break if input.eos?
|
197
|
+
|
198
|
+
nodes << parse_node( input )
|
199
|
+
end
|
200
|
+
nodes
|
201
|
+
end
|
202
|
+
end # class Parser
|
203
|
+
|
204
|
+
|
205
|
+
end # module Wikiscript
|
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
module Wikiscript
|
3
|
+
module Module
|
4
|
+
module Parser
|
5
|
+
VERSION = '0.0.1'
|
6
|
+
|
7
|
+
def self.banner
|
8
|
+
"wikiscript-parser/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.root
|
12
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
13
|
+
end
|
14
|
+
|
15
|
+
end # module Parser
|
16
|
+
end # module Module
|
17
|
+
end # module Wikiscript
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wikiscript-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-11-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: wikitree
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rdoc
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '4.0'
|
34
|
+
- - "<"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '7'
|
37
|
+
type: :development
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '4.0'
|
44
|
+
- - "<"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '7'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: hoe
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - "~>"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '3.22'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - "~>"
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '3.22'
|
61
|
+
description: wikiscript-parser - wikitext/script parser (builds abstract syntax tree
|
62
|
+
'n' more)
|
63
|
+
email: opensport@googlegroups.com
|
64
|
+
executables: []
|
65
|
+
extensions: []
|
66
|
+
extra_rdoc_files:
|
67
|
+
- CHANGELOG.md
|
68
|
+
- Manifest.txt
|
69
|
+
- README.md
|
70
|
+
files:
|
71
|
+
- CHANGELOG.md
|
72
|
+
- Manifest.txt
|
73
|
+
- README.md
|
74
|
+
- Rakefile
|
75
|
+
- lib/wikiscript-parser.rb
|
76
|
+
- lib/wikiscript-parser/parser.rb
|
77
|
+
- lib/wikiscript-parser/version.rb
|
78
|
+
- lib/wikiscript/parser.rb
|
79
|
+
homepage: https://github.com/wikiscript/wikiscript
|
80
|
+
licenses:
|
81
|
+
- Public Domain
|
82
|
+
metadata: {}
|
83
|
+
post_install_message:
|
84
|
+
rdoc_options:
|
85
|
+
- "--main"
|
86
|
+
- README.md
|
87
|
+
require_paths:
|
88
|
+
- lib
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 2.2.2
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 2.5.2
|
102
|
+
signing_key:
|
103
|
+
specification_version: 4
|
104
|
+
summary: wikiscript-parser - wikitext/script parser (builds abstract syntax tree 'n'
|
105
|
+
more)
|
106
|
+
test_files: []
|