xmlscan 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,23 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # xmlscan/version.rb
4
+ #
5
+ # Copyright (C) UENO Katsuhiro 2002,2003
6
+ #
7
+ # $Id: version.rb,v 1.8.2.3 2003/05/01 15:50:00 katsu Exp $
8
+ #
9
+
10
+ module XMLScan
11
+
12
+ # The version like 'X.X.0' (TENNY is 0) means that this is an unstable
13
+ # release. Incompatible changes will be applied to this version
14
+ # without special notice. This version should be distributed as a
15
+ # snapshot only.
16
+ #
17
+ # TENNY which is larger than 1 (e.g. 'X.X.1' or 'X.X.2') means this
18
+ # release is a stable release.
19
+
20
+ VERSION = '0.2.3'
21
+ RELEASE_DATE = '2003-05-02'
22
+
23
+ end
@@ -0,0 +1,162 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # xmlscan/visitor.rb
4
+ #
5
+ # Copyright (C) Ueno Katsuhiro 2002
6
+ #
7
+ # $Id: visitor.rb,v 1.2 2003/01/13 04:07:25 katsu Exp $
8
+ #
9
+
10
+ require 'xmlscan/version'
11
+
12
+
13
+ module XMLScan
14
+
15
+ class Error < StandardError
16
+
17
+ def initialize(msg, path = nil, lineno = nil)
18
+ super msg
19
+ @path = path
20
+ @lineno = lineno
21
+ end
22
+
23
+ attr_reader :path, :lineno
24
+
25
+ def to_s
26
+ if @lineno and @path then
27
+ "#{@path}:#{@lineno}:#{super}"
28
+ else
29
+ super
30
+ end
31
+ end
32
+
33
+ end
34
+
35
+ class ParseError < Error ; end
36
+ class NotWellFormedError < Error ; end
37
+ class NotValidError < Error ; end
38
+
39
+
40
+ module Visitor
41
+
42
+ def parse_error(msg)
43
+ raise ParseError.new(msg)
44
+ end
45
+
46
+ def wellformed_error(msg)
47
+ raise NotWellFormedError.new(msg)
48
+ end
49
+
50
+ def valid_error(msg)
51
+ raise NotValidError.new(msg)
52
+ end
53
+
54
+ def warning(msg)
55
+ end
56
+
57
+ def on_xmldecl
58
+ end
59
+
60
+ def on_xmldecl_key(key, str)
61
+ end
62
+
63
+ def on_xmldecl_version(str)
64
+ end
65
+
66
+ def on_xmldecl_encoding(str)
67
+ end
68
+
69
+ def on_xmldecl_standalone(str)
70
+ end
71
+
72
+ def on_xmldecl_other(name, value)
73
+ end
74
+
75
+ def on_xmldecl_end
76
+ end
77
+
78
+ def on_doctype(root, pubid, sysid)
79
+ end
80
+
81
+ def on_prolog_space(str)
82
+ end
83
+
84
+ def on_comment(str)
85
+ end
86
+
87
+ def on_pi(target, pi)
88
+ end
89
+
90
+ def on_chardata(str)
91
+ end
92
+
93
+ def on_cdata(str)
94
+ end
95
+
96
+ def on_etag(name)
97
+ end
98
+
99
+ def on_entityref(ref)
100
+ end
101
+
102
+ def on_charref(code)
103
+ end
104
+
105
+ def on_charref_hex(code)
106
+ end
107
+
108
+ def on_start_document
109
+ end
110
+
111
+ def on_end_document
112
+ end
113
+
114
+ def on_stag(name)
115
+ end
116
+
117
+ def on_attribute(name)
118
+ end
119
+
120
+ def on_attr_value(str)
121
+ end
122
+
123
+ def on_attr_entityref(ref)
124
+ end
125
+
126
+ def on_attr_charref(code)
127
+ end
128
+
129
+ def on_attr_charref_hex(code)
130
+ end
131
+
132
+ def on_attribute_end(name)
133
+ end
134
+
135
+ def on_stag_end_empty(name)
136
+ end
137
+
138
+ def on_stag_end(name)
139
+ end
140
+
141
+ end
142
+
143
+
144
+ class Decoration
145
+
146
+ include Visitor
147
+
148
+ def initialize(visitor)
149
+ @visitor = visitor
150
+ end
151
+
152
+ Visitor.instance_methods.each { |i|
153
+ module_eval <<-END, __FILE__, __LINE__ + 1
154
+ def #{i}(*args)
155
+ @visitor.#{i}(*args)
156
+ end
157
+ END
158
+ }
159
+
160
+ end
161
+
162
+ end
@@ -0,0 +1,248 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # xmlscan/scanner.rb
4
+ #
5
+ # Copyright (C) Ueno Katsuhiro 2002
6
+ #
7
+ # $Id: xmlchar.rb,v 1.5.2.2 2003/05/01 14:25:55 katsu Exp $
8
+ #
9
+
10
+ require 'xmlscan/scanner'
11
+
12
+
13
+ module XMLScan
14
+
15
+ ENC_UTF8 = Encoding.find('UTF-8')
16
+
17
+ module XMLChar
18
+
19
+ CharPattern = /\\A[\P{C}\t\n\r]*\\z/u
20
+ NotCharPattern = /[^\P{C}\t\n\r]/u
21
+
22
+ NmtokenPattern = /\\A[\p{Alnum}]+\z/u
23
+ NotNameCharPattern = /[^\p{Alnum}}]/u
24
+
25
+ NamePattern = /\A[\:\_\p{Letter}][\:\_\-\.\p{Alnum}]*\z/u
26
+
27
+ def valid_char?(code)
28
+ return false if code > 0x10ffff
29
+ NotCharPattern !~ [code].pack('U')
30
+ end
31
+
32
+ def valid_chardata?(str)
33
+ NotCharPattern !~ str
34
+ end
35
+
36
+ def valid_nmtoken?(str)
37
+ NotNameCharPattern !~ str
38
+ end
39
+
40
+ def valid_name?(str)
41
+ not NamePattern !~ str
42
+ end
43
+
44
+ module_function :valid_char?, :valid_chardata?
45
+ module_function :valid_nmtoken?, :valid_name?
46
+
47
+
48
+ def valid_pubid?(str)
49
+ /[^\- \r\na-zA-Z0-9'()+,.\/:=?;!*#\@$_%]/u !~ str
50
+ end
51
+
52
+
53
+ def valid_version?(str)
54
+ /[^\-a-zA-Z0-9_.:]/u !~ str
55
+ end
56
+ module_function :valid_version?
57
+
58
+
59
+ def valid_encoding?(str)
60
+ if /\A[A-Za-z]([\-A-Za-z0-9._])*\z/u =~ str then
61
+ true
62
+ else
63
+ false
64
+ end
65
+ end
66
+ module_function :valid_encoding?
67
+
68
+ end
69
+
70
+
71
+
72
+
73
+ class XMLScanner
74
+
75
+ module StrictChar
76
+
77
+ include XMLChar
78
+
79
+ private
80
+
81
+ def check_valid_name(name)
82
+ unless valid_name? name then
83
+ parse_error "`#{name}' is not valid for XML name"
84
+ end
85
+ end
86
+
87
+ def check_valid_chardata(str)
88
+ unless valid_chardata? str then
89
+ parse_error "invlalid XML character is found"
90
+ end
91
+ end
92
+
93
+ def check_valid_char(code)
94
+ unless valid_char? code then
95
+ wellformed_error "#{code} is not a valid XML character"
96
+ end
97
+ end
98
+
99
+ def check_valid_version(str)
100
+ unless valid_version? str then
101
+ parse_error "#{str} is not a valid XML version"
102
+ end
103
+ end
104
+
105
+ def check_valid_encoding(str)
106
+ unless valid_encoding? str then
107
+ parse_error "#{str} is not a valid XML encoding name"
108
+ end
109
+ end
110
+
111
+ def check_valid_pubid(str)
112
+ unless valid_pubid? str then
113
+ parse_error "#{str} is not a valid public ID"
114
+ end
115
+ end
116
+
117
+
118
+ def on_xmldecl_version(str)
119
+ check_valid_version str
120
+ super
121
+ end
122
+
123
+ def on_xmldecl_encoding(str)
124
+ check_valid_encoding str
125
+ super
126
+ end
127
+
128
+ def on_xmldecl_standalone(str)
129
+ check_valid_chardata str
130
+ super
131
+ end
132
+
133
+ def on_doctype(root, pubid, sysid)
134
+ check_valid_name root
135
+ check_valid_pubid pubid if pubid
136
+ check_valid_chardata sysid if sysid
137
+ super
138
+ end
139
+
140
+ def on_comment(str)
141
+ check_valid_chardata str
142
+ super
143
+ end
144
+
145
+ def on_pi(target, pi)
146
+ check_valid_name target
147
+ check_valid_chardata pi
148
+ super
149
+ end
150
+
151
+ def on_chardata(str)
152
+ check_valid_chardata str
153
+ super
154
+ end
155
+
156
+ def on_cdata(str)
157
+ check_valid_chardata str
158
+ super
159
+ end
160
+
161
+ def on_etag(name)
162
+ check_valid_name name
163
+ super
164
+ end
165
+
166
+ def on_entityref(ref)
167
+ check_valid_name ref
168
+ super
169
+ end
170
+
171
+ def on_charref(code)
172
+ check_valid_char code
173
+ super
174
+ end
175
+
176
+ def on_charref_hex(code)
177
+ check_valid_char code
178
+ super
179
+ end
180
+
181
+ def on_stag(name)
182
+ check_valid_name name
183
+ super
184
+ end
185
+
186
+ def on_attribute(name)
187
+ check_valid_name name
188
+ super
189
+ end
190
+
191
+ def on_attr_value(str)
192
+ check_valid_chardata str
193
+ super
194
+ end
195
+
196
+ def on_attr_entityref(ref)
197
+ check_valid_name ref
198
+ super
199
+ end
200
+
201
+ def on_attr_charref(code)
202
+ check_valid_char code
203
+ super
204
+ end
205
+
206
+ def on_attr_charref_hex(code)
207
+ check_valid_char code
208
+ super
209
+ end
210
+
211
+ end
212
+
213
+
214
+ private
215
+
216
+ def apply_option_strict_char
217
+ extend StrictChar
218
+ end
219
+
220
+ end
221
+
222
+
223
+ end
224
+
225
+
226
+
227
+
228
+
229
+
230
+ if $0 == __FILE__ then
231
+ class TestVisitor
232
+ include XMLScan::Visitor
233
+ def parse_error(msg)
234
+ STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
235
+ end
236
+ def wellformed_error(msg)
237
+ STDERR.printf("%s:%d: WFC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
238
+ end
239
+ end
240
+
241
+ $s = scan = XMLScan::XMLScanner.new(TestVisitor.new, :strict_char)
242
+ src = ARGF
243
+ def src.path; filename; end
244
+ t1 = Time.times.utime
245
+ scan.parse src
246
+ t2 = Time.times.utime
247
+ STDERR.printf "%2.3f sec\n", t2 - t1
248
+ end
data/test.rb ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/ruby
2
+ $-w = true
3
+ $LOAD_PATH.unshift 'lib'
4
+ $LOAD_PATH.unshift 'tests'
5
+ Dir.chdir File.dirname($0)
6
+ require 'testall'
7
+ load 'runtest.rb'
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xmlscan
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - UENO Katsuhiro <katsu@blue.sky.or.jp>
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &8077620 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.8.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *8077620
25
+ - !ruby/object:Gem::Dependency
26
+ name: rdoc
27
+ requirement: &8076660 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: '3.12'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *8076660
36
+ - !ruby/object:Gem::Dependency
37
+ name: bundler
38
+ requirement: &8075620 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 1.0.0
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *8075620
47
+ - !ruby/object:Gem::Dependency
48
+ name: jeweler
49
+ requirement: &8074720 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.8.3
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *8074720
58
+ description: The fastest XML parser written in 100% pure Ruby.
59
+ email: gerryg@inbox.com
60
+ executables: []
61
+ extensions: []
62
+ extra_rdoc_files:
63
+ - README.rdoc
64
+ files:
65
+ - ChangeLog
66
+ - Gemfile
67
+ - Gemfile.lock
68
+ - README.rdoc
69
+ - Rakefile
70
+ - THANKS
71
+ - VERSION
72
+ - install.rb
73
+ - lib/xmlscan/htmlscan.rb
74
+ - lib/xmlscan/namespace.rb
75
+ - lib/xmlscan/parser.rb
76
+ - lib/xmlscan/scanner.rb
77
+ - lib/xmlscan/version.rb
78
+ - lib/xmlscan/visitor.rb
79
+ - lib/xmlscan/xmlchar.rb
80
+ - test.rb
81
+ homepage: http://github.com/GerryG/xmlformat/
82
+ licenses:
83
+ - MIT
84
+ post_install_message:
85
+ rdoc_options:
86
+ - --main
87
+ - README.rdoc
88
+ - --inline-source
89
+ - --line-numbers
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ segments:
99
+ - 0
100
+ hash: 3268123461909302440
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ! '>='
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 1.8.15
110
+ signing_key:
111
+ specification_version: 3
112
+ summary: The fastest XML parser written in 100% pure Ruby.
113
+ test_files: []