xmlscan 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # xmlscan/version.rb
4
+ #
5
+ # Copyright (C) UENO Katsuhiro 2002,2003
6
+ #
7
+ # $Id: version.rb,v 1.8.2.3 2003/05/01 15:50:00 katsu Exp $
8
+ #
9
+
10
+ module XMLScan
11
+
12
+ # The version like 'X.X.0' (TENNY is 0) means that this is an unstable
13
+ # release. Incompatible changes will be applied to this version
14
+ # without special notice. This version should be distributed as a
15
+ # snapshot only.
16
+ #
17
+ # TENNY which is larger than 1 (e.g. 'X.X.1' or 'X.X.2') means this
18
+ # release is a stable release.
19
+
20
+ VERSION = '0.2.3'
21
+ RELEASE_DATE = '2003-05-02'
22
+
23
+ end
@@ -0,0 +1,162 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # xmlscan/visitor.rb
4
+ #
5
+ # Copyright (C) Ueno Katsuhiro 2002
6
+ #
7
+ # $Id: visitor.rb,v 1.2 2003/01/13 04:07:25 katsu Exp $
8
+ #
9
+
10
+ require 'xmlscan/version'
11
+
12
+
13
+ module XMLScan
14
+
15
+ class Error < StandardError
16
+
17
+ def initialize(msg, path = nil, lineno = nil)
18
+ super msg
19
+ @path = path
20
+ @lineno = lineno
21
+ end
22
+
23
+ attr_reader :path, :lineno
24
+
25
+ def to_s
26
+ if @lineno and @path then
27
+ "#{@path}:#{@lineno}:#{super}"
28
+ else
29
+ super
30
+ end
31
+ end
32
+
33
+ end
34
+
35
+ class ParseError < Error ; end
36
+ class NotWellFormedError < Error ; end
37
+ class NotValidError < Error ; end
38
+
39
+
40
+ module Visitor
41
+
42
+ def parse_error(msg)
43
+ raise ParseError.new(msg)
44
+ end
45
+
46
+ def wellformed_error(msg)
47
+ raise NotWellFormedError.new(msg)
48
+ end
49
+
50
+ def valid_error(msg)
51
+ raise NotValidError.new(msg)
52
+ end
53
+
54
+ def warning(msg)
55
+ end
56
+
57
+ def on_xmldecl
58
+ end
59
+
60
+ def on_xmldecl_key(key, str)
61
+ end
62
+
63
+ def on_xmldecl_version(str)
64
+ end
65
+
66
+ def on_xmldecl_encoding(str)
67
+ end
68
+
69
+ def on_xmldecl_standalone(str)
70
+ end
71
+
72
+ def on_xmldecl_other(name, value)
73
+ end
74
+
75
+ def on_xmldecl_end
76
+ end
77
+
78
+ def on_doctype(root, pubid, sysid)
79
+ end
80
+
81
+ def on_prolog_space(str)
82
+ end
83
+
84
+ def on_comment(str)
85
+ end
86
+
87
+ def on_pi(target, pi)
88
+ end
89
+
90
+ def on_chardata(str)
91
+ end
92
+
93
+ def on_cdata(str)
94
+ end
95
+
96
+ def on_etag(name)
97
+ end
98
+
99
+ def on_entityref(ref)
100
+ end
101
+
102
+ def on_charref(code)
103
+ end
104
+
105
+ def on_charref_hex(code)
106
+ end
107
+
108
+ def on_start_document
109
+ end
110
+
111
+ def on_end_document
112
+ end
113
+
114
+ def on_stag(name)
115
+ end
116
+
117
+ def on_attribute(name)
118
+ end
119
+
120
+ def on_attr_value(str)
121
+ end
122
+
123
+ def on_attr_entityref(ref)
124
+ end
125
+
126
+ def on_attr_charref(code)
127
+ end
128
+
129
+ def on_attr_charref_hex(code)
130
+ end
131
+
132
+ def on_attribute_end(name)
133
+ end
134
+
135
+ def on_stag_end_empty(name)
136
+ end
137
+
138
+ def on_stag_end(name)
139
+ end
140
+
141
+ end
142
+
143
+
144
+ class Decoration
145
+
146
+ include Visitor
147
+
148
+ def initialize(visitor)
149
+ @visitor = visitor
150
+ end
151
+
152
+ Visitor.instance_methods.each { |i|
153
+ module_eval <<-END, __FILE__, __LINE__ + 1
154
+ def #{i}(*args)
155
+ @visitor.#{i}(*args)
156
+ end
157
+ END
158
+ }
159
+
160
+ end
161
+
162
+ end
@@ -0,0 +1,248 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # xmlscan/scanner.rb
4
+ #
5
+ # Copyright (C) Ueno Katsuhiro 2002
6
+ #
7
+ # $Id: xmlchar.rb,v 1.5.2.2 2003/05/01 14:25:55 katsu Exp $
8
+ #
9
+
10
+ require 'xmlscan/scanner'
11
+
12
+
13
+ module XMLScan
14
+
15
+ ENC_UTF8 = Encoding.find('UTF-8')
16
+
17
+ module XMLChar
18
+
19
+ CharPattern = /\\A[\P{C}\t\n\r]*\\z/u
20
+ NotCharPattern = /[^\P{C}\t\n\r]/u
21
+
22
+ NmtokenPattern = /\\A[\p{Alnum}]+\z/u
23
+ NotNameCharPattern = /[^\p{Alnum}}]/u
24
+
25
+ NamePattern = /\A[\:\_\p{Letter}][\:\_\-\.\p{Alnum}]*\z/u
26
+
27
+ def valid_char?(code)
28
+ return false if code > 0x10ffff
29
+ NotCharPattern !~ [code].pack('U')
30
+ end
31
+
32
+ def valid_chardata?(str)
33
+ NotCharPattern !~ str
34
+ end
35
+
36
+ def valid_nmtoken?(str)
37
+ NotNameCharPattern !~ str
38
+ end
39
+
40
+ def valid_name?(str)
41
+ not NamePattern !~ str
42
+ end
43
+
44
+ module_function :valid_char?, :valid_chardata?
45
+ module_function :valid_nmtoken?, :valid_name?
46
+
47
+
48
+ def valid_pubid?(str)
49
+ /[^\- \r\na-zA-Z0-9'()+,.\/:=?;!*#\@$_%]/u !~ str
50
+ end
51
+
52
+
53
+ def valid_version?(str)
54
+ /[^\-a-zA-Z0-9_.:]/u !~ str
55
+ end
56
+ module_function :valid_version?
57
+
58
+
59
+ def valid_encoding?(str)
60
+ if /\A[A-Za-z]([\-A-Za-z0-9._])*\z/u =~ str then
61
+ true
62
+ else
63
+ false
64
+ end
65
+ end
66
+ module_function :valid_encoding?
67
+
68
+ end
69
+
70
+
71
+
72
+
73
+ class XMLScanner
74
+
75
+ module StrictChar
76
+
77
+ include XMLChar
78
+
79
+ private
80
+
81
+ def check_valid_name(name)
82
+ unless valid_name? name then
83
+ parse_error "`#{name}' is not valid for XML name"
84
+ end
85
+ end
86
+
87
+ def check_valid_chardata(str)
88
+ unless valid_chardata? str then
89
+ parse_error "invlalid XML character is found"
90
+ end
91
+ end
92
+
93
+ def check_valid_char(code)
94
+ unless valid_char? code then
95
+ wellformed_error "#{code} is not a valid XML character"
96
+ end
97
+ end
98
+
99
+ def check_valid_version(str)
100
+ unless valid_version? str then
101
+ parse_error "#{str} is not a valid XML version"
102
+ end
103
+ end
104
+
105
+ def check_valid_encoding(str)
106
+ unless valid_encoding? str then
107
+ parse_error "#{str} is not a valid XML encoding name"
108
+ end
109
+ end
110
+
111
+ def check_valid_pubid(str)
112
+ unless valid_pubid? str then
113
+ parse_error "#{str} is not a valid public ID"
114
+ end
115
+ end
116
+
117
+
118
+ def on_xmldecl_version(str)
119
+ check_valid_version str
120
+ super
121
+ end
122
+
123
+ def on_xmldecl_encoding(str)
124
+ check_valid_encoding str
125
+ super
126
+ end
127
+
128
+ def on_xmldecl_standalone(str)
129
+ check_valid_chardata str
130
+ super
131
+ end
132
+
133
+ def on_doctype(root, pubid, sysid)
134
+ check_valid_name root
135
+ check_valid_pubid pubid if pubid
136
+ check_valid_chardata sysid if sysid
137
+ super
138
+ end
139
+
140
+ def on_comment(str)
141
+ check_valid_chardata str
142
+ super
143
+ end
144
+
145
+ def on_pi(target, pi)
146
+ check_valid_name target
147
+ check_valid_chardata pi
148
+ super
149
+ end
150
+
151
+ def on_chardata(str)
152
+ check_valid_chardata str
153
+ super
154
+ end
155
+
156
+ def on_cdata(str)
157
+ check_valid_chardata str
158
+ super
159
+ end
160
+
161
+ def on_etag(name)
162
+ check_valid_name name
163
+ super
164
+ end
165
+
166
+ def on_entityref(ref)
167
+ check_valid_name ref
168
+ super
169
+ end
170
+
171
+ def on_charref(code)
172
+ check_valid_char code
173
+ super
174
+ end
175
+
176
+ def on_charref_hex(code)
177
+ check_valid_char code
178
+ super
179
+ end
180
+
181
+ def on_stag(name)
182
+ check_valid_name name
183
+ super
184
+ end
185
+
186
+ def on_attribute(name)
187
+ check_valid_name name
188
+ super
189
+ end
190
+
191
+ def on_attr_value(str)
192
+ check_valid_chardata str
193
+ super
194
+ end
195
+
196
+ def on_attr_entityref(ref)
197
+ check_valid_name ref
198
+ super
199
+ end
200
+
201
+ def on_attr_charref(code)
202
+ check_valid_char code
203
+ super
204
+ end
205
+
206
+ def on_attr_charref_hex(code)
207
+ check_valid_char code
208
+ super
209
+ end
210
+
211
+ end
212
+
213
+
214
+ private
215
+
216
+ def apply_option_strict_char
217
+ extend StrictChar
218
+ end
219
+
220
+ end
221
+
222
+
223
+ end
224
+
225
+
226
+
227
+
228
+
229
+
230
+ if $0 == __FILE__ then
231
+ class TestVisitor
232
+ include XMLScan::Visitor
233
+ def parse_error(msg)
234
+ STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
235
+ end
236
+ def wellformed_error(msg)
237
+ STDERR.printf("%s:%d: WFC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
238
+ end
239
+ end
240
+
241
+ $s = scan = XMLScan::XMLScanner.new(TestVisitor.new, :strict_char)
242
+ src = ARGF
243
+ def src.path; filename; end
244
+ t1 = Time.times.utime
245
+ scan.parse src
246
+ t2 = Time.times.utime
247
+ STDERR.printf "%2.3f sec\n", t2 - t1
248
+ end
data/test.rb ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/ruby
2
+ $-w = true
3
+ $LOAD_PATH.unshift 'lib'
4
+ $LOAD_PATH.unshift 'tests'
5
+ Dir.chdir File.dirname($0)
6
+ require 'testall'
7
+ load 'runtest.rb'
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xmlscan
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - UENO Katsuhiro <katsu@blue.sky.or.jp>
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &8077620 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.8.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *8077620
25
+ - !ruby/object:Gem::Dependency
26
+ name: rdoc
27
+ requirement: &8076660 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: '3.12'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *8076660
36
+ - !ruby/object:Gem::Dependency
37
+ name: bundler
38
+ requirement: &8075620 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 1.0.0
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *8075620
47
+ - !ruby/object:Gem::Dependency
48
+ name: jeweler
49
+ requirement: &8074720 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.8.3
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *8074720
58
+ description: The fastest XML parser written in 100% pure Ruby.
59
+ email: gerryg@inbox.com
60
+ executables: []
61
+ extensions: []
62
+ extra_rdoc_files:
63
+ - README.rdoc
64
+ files:
65
+ - ChangeLog
66
+ - Gemfile
67
+ - Gemfile.lock
68
+ - README.rdoc
69
+ - Rakefile
70
+ - THANKS
71
+ - VERSION
72
+ - install.rb
73
+ - lib/xmlscan/htmlscan.rb
74
+ - lib/xmlscan/namespace.rb
75
+ - lib/xmlscan/parser.rb
76
+ - lib/xmlscan/scanner.rb
77
+ - lib/xmlscan/version.rb
78
+ - lib/xmlscan/visitor.rb
79
+ - lib/xmlscan/xmlchar.rb
80
+ - test.rb
81
+ homepage: http://github.com/GerryG/xmlformat/
82
+ licenses:
83
+ - MIT
84
+ post_install_message:
85
+ rdoc_options:
86
+ - --main
87
+ - README.rdoc
88
+ - --inline-source
89
+ - --line-numbers
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ segments:
99
+ - 0
100
+ hash: 3268123461909302440
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ! '>='
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 1.8.15
110
+ signing_key:
111
+ specification_version: 3
112
+ summary: The fastest XML parser written in 100% pure Ruby.
113
+ test_files: []