wikiscript-parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4cc732979e1e25604b1cdbe22b32257cb6a45f81
4
+ data.tar.gz: 6f0c638ffff2721893648fd14c806ccda48c129a
5
+ SHA512:
6
+ metadata.gz: 5be2b8eb71b3cd149ee292dd95309f959661092ebbf17a50633a195fa879178c4a087ae3af503bc6fe322e9d113d0f347f9e5e14272f30e296e973dd9494658b
7
+ data.tar.gz: b2401e1c920ae981e20b77b8399b8e03791cf38901440fc9e35a584c4bcf68d6c633b10315168d68e4a30806ccb4fa6c9b923d152274217dd6d586f992424a3f
@@ -0,0 +1,3 @@
1
+ ### 0.0.1 / 2020-11-30
2
+
3
+ * Everything is new. First release.
@@ -0,0 +1,8 @@
1
+ CHANGELOG.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ lib/wikiscript-parser.rb
6
+ lib/wikiscript-parser/parser.rb
7
+ lib/wikiscript-parser/version.rb
8
+ lib/wikiscript/parser.rb
@@ -0,0 +1,31 @@
1
+ # wikiscript-parser - wikitext/script parser (builds abstract syntax tree 'n' more )
2
+
3
+ * home :: [github.com/wikiscript/wikiscript](https://github.com/wikiscript/wikiscript)
4
+ * bugs :: [github.com/wikiscript/wikiscript/issues](https://github.com/wikiscript/wikiscript/issues)
5
+ * gem :: [rubygems.org/gems/wikiscript-parser](https://rubygems.org/gems/wikiscript-parser)
6
+ * rdoc :: [rubydoc.info/gems/wikiscript-parser](http://rubydoc.info/gems/wikiscript-parser)
7
+
8
+
9
+
10
+ ## Usage
11
+
12
+
13
+ to be done
14
+
15
+
16
+
17
+ ## Install
18
+
19
+ Use
20
+
21
+ gem install wikiscript-parser
22
+
23
+ or add to your Gemfile
24
+
25
+ gem 'wikiscript-parser'
26
+
27
+
28
+ ## License
29
+
30
+ The `wikiscript` scripts are dedicated to the public domain.
31
+ Use it as you please with no restrictions whatsoever.
@@ -0,0 +1,29 @@
1
+ require 'hoe'
2
+ require './lib/wikiscript-parser/version.rb'
3
+
4
+ Hoe.spec 'wikiscript-parser' do
5
+
6
+ self.version = Wikiscript::Module::Parser::VERSION
7
+
8
+ self.summary = "wikiscript-parser - wikitext/script parser (builds abstract syntax tree 'n' more)"
9
+ self.description = summary
10
+
11
+ self.urls = { home: 'https://github.com/wikiscript/wikiscript' }
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'opensport@googlegroups.com'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'CHANGELOG.md'
19
+
20
+ self.extra_deps = [
21
+ ['wikitree' ],
22
+ ]
23
+
24
+ self.licenses = ['Public Domain']
25
+
26
+ self.spec_extras = {
27
+ required_ruby_version: '>= 2.2.2'
28
+ }
29
+ end
@@ -0,0 +1,25 @@
1
+ ## stdlibs
2
+ require 'strscan'
3
+
4
+
5
+ ## 3rd party gems/libs
6
+ require 'wikitree'
7
+
8
+ # our own code
9
+ require 'wikiscript-parser/version' # let it always go first
10
+ require 'wikiscript-parser/parser'
11
+
12
+
13
+
14
+
15
+ ####
16
+ # convenience all-in-one parse helper - add - why? why not?
17
+ module Wikiscript
18
+ def self.parse( text )
19
+ Parser.new( text ).parse
20
+ end
21
+ end
22
+
23
+
24
+
25
+ puts Wikiscript::Module::Parser.banner
@@ -0,0 +1,205 @@
1
+ module Wikiscript
2
+
3
+
4
+ class Parser
5
+ ####
6
+ # convenience all-in-one parse helper
7
+ def self.parse( text )
8
+ new( text ).parse
9
+ end
10
+
11
+
12
+ def initialize( text )
13
+ @text = text
14
+ end
15
+
16
+ def parse
17
+ parse_lines( @text )
18
+ end
19
+
20
+
21
+
22
+
23
+ def skip_whitespaces( input ) ## incl. multiple newlines
24
+ return 0 if input.eos?
25
+
26
+ input.scan( /[ \t\r\n]*/ )
27
+ end
28
+
29
+ #
30
+ # Whereas MediaWiki variable names are all uppercase,
31
+ # template names have the same basic features and limitations as all page names:
32
+ # they are case-sensitive (except for the first character);
33
+ # underscores are parsed as spaces;
34
+ # and they cannot contain any of these characters: # < > [ ] | { }.
35
+ # This is because those are reserved for wiki markup and HTML.
36
+
37
+ TEMPLATE_BEGIN_RE = /\{\{/ ## e.g {{
38
+ TEMPLATE_END_RE = /\}\}/ ## e.g. }}
39
+
40
+ ## todo/fix: check how to add # too!!!
41
+ ## todo: check what chars to escape in character class
42
+ ## change to something line [^|<>\[\]{}]+ ]
43
+ TEMPLATE_NAME_RE = /[a-z0-9 _-]+/i
44
+
45
+
46
+
47
+ def parse_template( input )
48
+ input.scan( TEMPLATE_BEGIN_RE ) ## e.g.{{
49
+ skip_whitespaces( input )
50
+
51
+ name = input.scan( TEMPLATE_NAME_RE )
52
+ name = name.strip ## strip trailing spaces?
53
+ puts "==> (begin) template >#{name}<"
54
+ skip_whitespaces( input )
55
+
56
+ params = []
57
+ loop do
58
+ if input.check( TEMPLATE_END_RE ) ## e.g. }}
59
+ input.scan( TEMPLATE_END_RE )
60
+ puts "<== (end) template >#{name}<"
61
+ ## puts " params:"
62
+ ## pp params
63
+ return Wikitree::Template.new( name, params )
64
+ elsif input.check( /\|/ ) ## e.g. |
65
+ puts " param #{params.size+1} (#{name}):"
66
+ param_name, param_value = parse_param( input )
67
+ params << [param_name, param_value]
68
+ else
69
+ puts "!! SYNTAX ERROR: expected closing }} or para | in template:"
70
+ puts input.peek( 100 )
71
+ exit 1
72
+ end
73
+ end
74
+ end
75
+
76
+
77
+
78
+ def parse_param( input )
79
+ input.scan( /\|/ )
80
+ skip_whitespaces( input )
81
+
82
+ name = nil
83
+ value = [] # note: value is an array of ast nodes!!!
84
+
85
+ ## check for named param e.g. hello=
86
+ ## otherwise assume content
87
+ if input.check( /[a-z0-9 _-]+(?==)/i ) ## note: use positive lookhead (=)
88
+ name = input.scan( /[a-z0-9 _-]+/i )
89
+ name = name.strip ## strip trailing spaces?
90
+ puts " param name >#{name}<"
91
+ input.scan( /=/ )
92
+ skip_whitespaces( input )
93
+
94
+ if input.check( /\|/ ) ||
95
+ input.check( /\}/ ) ## add/allow }} too? - why? why not?
96
+ ## allow empty value!!!
97
+ puts "!! WARN: empty value for param >#{name}<"
98
+ else
99
+ value = parse_param_value( input ) ## get keyed param value
100
+ puts " param value >#{value}<"
101
+ end
102
+ else
103
+ if input.check( /\|/ ) || ## add/allow }} too? - why? why not?
104
+ input.check( /\}/ )
105
+ ## allow empty value here too - why? why not?
106
+ puts "!! WARN: empty value for (unnamed/positioned) param"
107
+ else
108
+ value = parse_param_value( input ) ## get (unnamed) param value
109
+ puts " param value >#{value}<"
110
+ end
111
+ end
112
+ [name, value]
113
+ end
114
+
115
+
116
+ def parse_param_value( input ) ## todo: change to parse_param_value_nodes or such - why? why not??
117
+ # puts " [debug] parse_param_value >#{input.peek(10)}...<"
118
+
119
+ values = [] ## todo - change/rename to nodes??
120
+ loop do
121
+ values << parse_node( input )
122
+ skip_whitespaces( input )
123
+
124
+ ## puts " [debug] peek >#{input.peek(10)}...<"
125
+ if input.check( /\|/ ) || input.check( /\}\}/ )
126
+ ## puts " [debug] break param_value"
127
+ break
128
+ end
129
+
130
+ if input.eos?
131
+ puts "!! SYNTAX ERROR: unexpected end of string in param value; expected ending w/ | or }}"
132
+ exit 1
133
+ end
134
+ end
135
+
136
+ values
137
+ end
138
+
139
+
140
+ def parse_link( input ) ## todo/fix: change to parse_page - why? why not?
141
+ input.scan( /\[\[/ )
142
+
143
+ ## page name
144
+ name = input.scan( /[^|\]]+/ ).strip
145
+ alt_name = if input.check( /\|/ ) ## optional alternate/display name
146
+ input.scan( /\|/ ) ## eat up |
147
+ input.scan( /[^\]]+/ ).strip
148
+ else
149
+ nil
150
+ end
151
+
152
+ input.scan( /\]\]/ ) ## eatup ]]
153
+ skip_whitespaces( input )
154
+
155
+ if alt_name
156
+ puts " @page<#{name} | #{alt_name}>"
157
+ else
158
+ puts " @page<#{name}>"
159
+ end
160
+
161
+ Wikitree::Page.new( name, alt_name )
162
+ end
163
+
164
+
165
+ def parse_node( input )
166
+ ## puts " [debug] parse >#{input.peek(10)}...<"
167
+ if input.check( TEMPLATE_BEGIN_RE )
168
+ parse_template( input )
169
+ elsif input.check( /\[\[/ )
170
+ parse_link( input )
171
+ elsif input.check( /[^|{}\[\]]+/ ) ## check for rawtext run for now
172
+ run = input.scan( /[^|{}\[\]]+/ ).strip
173
+ # puts " text run=>#{run}<"
174
+ Wikitree::Text.new( run )
175
+ else
176
+ puts " !! SYNTAX ERROR: unknown content type:"
177
+ puts input.peek( 100 )
178
+ exit 1
179
+ end
180
+ end
181
+
182
+
183
+ def parse_lines( text )
184
+ ## note: remove all html comments for now - why? why not?
185
+ ## <!-- Area rank should match .. -->
186
+ text = text.gsub( /<!--.+?-->/m ) do |m| ## note: use .+? (non-greedy match)
187
+ puts " removing comment >#{m}<"
188
+ ''
189
+ end
190
+
191
+ input = StringScanner.new( text )
192
+
193
+ nodes = []
194
+ loop do
195
+ skip_whitespaces( input )
196
+ break if input.eos?
197
+
198
+ nodes << parse_node( input )
199
+ end
200
+ nodes
201
+ end
202
+ end # class Parser
203
+
204
+
205
+ end # module Wikiscript
@@ -0,0 +1,17 @@
1
+
2
+ module Wikiscript
3
+ module Module
4
+ module Parser
5
+ VERSION = '0.0.1'
6
+
7
+ def self.banner
8
+ "wikiscript-parser/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
9
+ end
10
+
11
+ def self.root
12
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
13
+ end
14
+
15
+ end # module Parser
16
+ end # module Module
17
+ end # module Wikiscript
@@ -0,0 +1,5 @@
1
+ # note: allow require 'wikiscript/parser' too
2
+ # (in addition to require 'wikiscript-parser')
3
+
4
+ require_relative '../wikiscript-parser'
5
+
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wikiscript-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Gerald Bauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-11-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: wikitree
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rdoc
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '4.0'
34
+ - - "<"
35
+ - !ruby/object:Gem::Version
36
+ version: '7'
37
+ type: :development
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '4.0'
44
+ - - "<"
45
+ - !ruby/object:Gem::Version
46
+ version: '7'
47
+ - !ruby/object:Gem::Dependency
48
+ name: hoe
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '3.22'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '3.22'
61
+ description: wikiscript-parser - wikitext/script parser (builds abstract syntax tree
62
+ 'n' more)
63
+ email: opensport@googlegroups.com
64
+ executables: []
65
+ extensions: []
66
+ extra_rdoc_files:
67
+ - CHANGELOG.md
68
+ - Manifest.txt
69
+ - README.md
70
+ files:
71
+ - CHANGELOG.md
72
+ - Manifest.txt
73
+ - README.md
74
+ - Rakefile
75
+ - lib/wikiscript-parser.rb
76
+ - lib/wikiscript-parser/parser.rb
77
+ - lib/wikiscript-parser/version.rb
78
+ - lib/wikiscript/parser.rb
79
+ homepage: https://github.com/wikiscript/wikiscript
80
+ licenses:
81
+ - Public Domain
82
+ metadata: {}
83
+ post_install_message:
84
+ rdoc_options:
85
+ - "--main"
86
+ - README.md
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: 2.2.2
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ requirements: []
100
+ rubyforge_project:
101
+ rubygems_version: 2.5.2
102
+ signing_key:
103
+ specification_version: 4
104
+ summary: wikiscript-parser - wikitext/script parser (builds abstract syntax tree 'n'
105
+ more)
106
+ test_files: []