rubysl-rexml 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +8 -0
- data/Gemfile +4 -0
- data/LICENSE +25 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/lib/rexml/attlistdecl.rb +62 -0
- data/lib/rexml/attribute.rb +185 -0
- data/lib/rexml/cdata.rb +67 -0
- data/lib/rexml/child.rb +96 -0
- data/lib/rexml/comment.rb +80 -0
- data/lib/rexml/doctype.rb +271 -0
- data/lib/rexml/document.rb +230 -0
- data/lib/rexml/dtd/attlistdecl.rb +10 -0
- data/lib/rexml/dtd/dtd.rb +51 -0
- data/lib/rexml/dtd/elementdecl.rb +17 -0
- data/lib/rexml/dtd/entitydecl.rb +56 -0
- data/lib/rexml/dtd/notationdecl.rb +39 -0
- data/lib/rexml/element.rb +1227 -0
- data/lib/rexml/encoding.rb +71 -0
- data/lib/rexml/encodings/CP-1252.rb +103 -0
- data/lib/rexml/encodings/EUC-JP.rb +35 -0
- data/lib/rexml/encodings/ICONV.rb +22 -0
- data/lib/rexml/encodings/ISO-8859-1.rb +7 -0
- data/lib/rexml/encodings/ISO-8859-15.rb +72 -0
- data/lib/rexml/encodings/SHIFT-JIS.rb +37 -0
- data/lib/rexml/encodings/SHIFT_JIS.rb +1 -0
- data/lib/rexml/encodings/UNILE.rb +34 -0
- data/lib/rexml/encodings/US-ASCII.rb +30 -0
- data/lib/rexml/encodings/UTF-16.rb +35 -0
- data/lib/rexml/encodings/UTF-8.rb +18 -0
- data/lib/rexml/entity.rb +166 -0
- data/lib/rexml/formatters/default.rb +109 -0
- data/lib/rexml/formatters/pretty.rb +138 -0
- data/lib/rexml/formatters/transitive.rb +56 -0
- data/lib/rexml/functions.rb +382 -0
- data/lib/rexml/instruction.rb +70 -0
- data/lib/rexml/light/node.rb +196 -0
- data/lib/rexml/namespace.rb +47 -0
- data/lib/rexml/node.rb +75 -0
- data/lib/rexml/output.rb +24 -0
- data/lib/rexml/parent.rb +166 -0
- data/lib/rexml/parseexception.rb +51 -0
- data/lib/rexml/parsers/baseparser.rb +503 -0
- data/lib/rexml/parsers/lightparser.rb +60 -0
- data/lib/rexml/parsers/pullparser.rb +196 -0
- data/lib/rexml/parsers/sax2parser.rb +238 -0
- data/lib/rexml/parsers/streamparser.rb +46 -0
- data/lib/rexml/parsers/treeparser.rb +97 -0
- data/lib/rexml/parsers/ultralightparser.rb +56 -0
- data/lib/rexml/parsers/xpathparser.rb +698 -0
- data/lib/rexml/quickpath.rb +266 -0
- data/lib/rexml/rexml.rb +32 -0
- data/lib/rexml/sax2listener.rb +97 -0
- data/lib/rexml/source.rb +251 -0
- data/lib/rexml/streamlistener.rb +92 -0
- data/lib/rexml/syncenumerator.rb +33 -0
- data/lib/rexml/text.rb +344 -0
- data/lib/rexml/undefinednamespaceexception.rb +8 -0
- data/lib/rexml/validation/relaxng.rb +559 -0
- data/lib/rexml/validation/validation.rb +155 -0
- data/lib/rexml/validation/validationexception.rb +9 -0
- data/lib/rexml/xmldecl.rb +119 -0
- data/lib/rexml/xmltokens.rb +18 -0
- data/lib/rexml/xpath.rb +66 -0
- data/lib/rexml/xpath_parser.rb +792 -0
- data/lib/rubysl/rexml.rb +1 -0
- data/lib/rubysl/rexml/version.rb +5 -0
- data/rubysl-rexml.gemspec +23 -0
- data/spec/attribute/clone_spec.rb +10 -0
- data/spec/attribute/element_spec.rb +22 -0
- data/spec/attribute/equal_value_spec.rb +17 -0
- data/spec/attribute/hash_spec.rb +12 -0
- data/spec/attribute/initialize_spec.rb +28 -0
- data/spec/attribute/inspect_spec.rb +19 -0
- data/spec/attribute/namespace_spec.rb +23 -0
- data/spec/attribute/node_type_spec.rb +9 -0
- data/spec/attribute/prefix_spec.rb +17 -0
- data/spec/attribute/remove_spec.rb +19 -0
- data/spec/attribute/to_s_spec.rb +13 -0
- data/spec/attribute/to_string_spec.rb +14 -0
- data/spec/attribute/value_spec.rb +14 -0
- data/spec/attribute/write_spec.rb +22 -0
- data/spec/attribute/xpath_spec.rb +19 -0
- data/spec/attributes/add_spec.rb +6 -0
- data/spec/attributes/append_spec.rb +6 -0
- data/spec/attributes/delete_all_spec.rb +30 -0
- data/spec/attributes/delete_spec.rb +26 -0
- data/spec/attributes/each_attribute_spec.rb +24 -0
- data/spec/attributes/each_spec.rb +24 -0
- data/spec/attributes/element_reference_spec.rb +18 -0
- data/spec/attributes/element_set_spec.rb +25 -0
- data/spec/attributes/get_attribute_ns_spec.rb +13 -0
- data/spec/attributes/get_attribute_spec.rb +28 -0
- data/spec/attributes/initialize_spec.rb +18 -0
- data/spec/attributes/length_spec.rb +6 -0
- data/spec/attributes/namespaces_spec.rb +5 -0
- data/spec/attributes/prefixes_spec.rb +23 -0
- data/spec/attributes/shared/add.rb +17 -0
- data/spec/attributes/shared/length.rb +12 -0
- data/spec/attributes/size_spec.rb +6 -0
- data/spec/attributes/to_a_spec.rb +20 -0
- data/spec/cdata/clone_spec.rb +9 -0
- data/spec/cdata/initialize_spec.rb +24 -0
- data/spec/cdata/shared/to_s.rb +11 -0
- data/spec/cdata/to_s_spec.rb +6 -0
- data/spec/cdata/value_spec.rb +6 -0
- data/spec/document/add_element_spec.rb +30 -0
- data/spec/document/add_spec.rb +60 -0
- data/spec/document/clone_spec.rb +19 -0
- data/spec/document/doctype_spec.rb +14 -0
- data/spec/document/encoding_spec.rb +21 -0
- data/spec/document/expanded_name_spec.rb +15 -0
- data/spec/document/new_spec.rb +37 -0
- data/spec/document/node_type_spec.rb +7 -0
- data/spec/document/root_spec.rb +11 -0
- data/spec/document/stand_alone_spec.rb +18 -0
- data/spec/document/version_spec.rb +13 -0
- data/spec/document/write_spec.rb +38 -0
- data/spec/document/xml_decl_spec.rb +14 -0
- data/spec/element/add_attribute_spec.rb +40 -0
- data/spec/element/add_attributes_spec.rb +21 -0
- data/spec/element/add_element_spec.rb +38 -0
- data/spec/element/add_namespace_spec.rb +23 -0
- data/spec/element/add_text_spec.rb +23 -0
- data/spec/element/attribute_spec.rb +16 -0
- data/spec/element/attributes_spec.rb +18 -0
- data/spec/element/cdatas_spec.rb +23 -0
- data/spec/element/clone_spec.rb +28 -0
- data/spec/element/comments_spec.rb +20 -0
- data/spec/element/delete_attribute_spec.rb +38 -0
- data/spec/element/delete_element_spec.rb +50 -0
- data/spec/element/delete_namespace_spec.rb +24 -0
- data/spec/element/document_spec.rb +17 -0
- data/spec/element/each_element_with_attribute_spec.rb +34 -0
- data/spec/element/each_element_with_text_spec.rb +30 -0
- data/spec/element/get_text_spec.rb +17 -0
- data/spec/element/has_attributes_spec.rb +16 -0
- data/spec/element/has_elements_spec.rb +17 -0
- data/spec/element/has_text_spec.rb +15 -0
- data/spec/element/inspect_spec.rb +26 -0
- data/spec/element/instructions_spec.rb +20 -0
- data/spec/element/namespace_spec.rb +26 -0
- data/spec/element/namespaces_spec.rb +31 -0
- data/spec/element/new_spec.rb +34 -0
- data/spec/element/next_element_spec.rb +18 -0
- data/spec/element/node_type_spec.rb +7 -0
- data/spec/element/prefixes_spec.rb +22 -0
- data/spec/element/previous_element_spec.rb +19 -0
- data/spec/element/raw_spec.rb +23 -0
- data/spec/element/root_spec.rb +27 -0
- data/spec/element/text_spec.rb +45 -0
- data/spec/element/texts_spec.rb +15 -0
- data/spec/element/whitespace_spec.rb +22 -0
- data/spec/node/each_recursive_spec.rb +20 -0
- data/spec/node/find_first_recursive_spec.rb +24 -0
- data/spec/node/index_in_parent_spec.rb +14 -0
- data/spec/node/next_sibling_node_spec.rb +20 -0
- data/spec/node/parent_spec.rb +20 -0
- data/spec/node/previous_sibling_node_spec.rb +20 -0
- data/spec/shared/each_element.rb +35 -0
- data/spec/shared/elements_to_a.rb +35 -0
- data/spec/text/append_spec.rb +9 -0
- data/spec/text/clone_spec.rb +9 -0
- data/spec/text/comparison_spec.rb +24 -0
- data/spec/text/empty_spec.rb +11 -0
- data/spec/text/indent_text_spec.rb +23 -0
- data/spec/text/inspect_spec.rb +7 -0
- data/spec/text/new_spec.rb +48 -0
- data/spec/text/node_type_spec.rb +7 -0
- data/spec/text/normalize_spec.rb +7 -0
- data/spec/text/read_with_substitution_spec.rb +12 -0
- data/spec/text/to_s_spec.rb +17 -0
- data/spec/text/unnormalize_spec.rb +7 -0
- data/spec/text/value_spec.rb +36 -0
- data/spec/text/wrap_spec.rb +20 -0
- data/spec/text/write_with_substitution_spec.rb +32 -0
- metadata +385 -0
@@ -0,0 +1,71 @@
|
|
1
|
+
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
|
2
|
+
module REXML
|
3
|
+
module Encoding
|
4
|
+
@encoding_methods = {}
|
5
|
+
def self.register(enc, &block)
|
6
|
+
@encoding_methods[enc] = block
|
7
|
+
end
|
8
|
+
def self.apply(obj, enc)
|
9
|
+
@encoding_methods[enc][obj]
|
10
|
+
end
|
11
|
+
def self.encoding_method(enc)
|
12
|
+
@encoding_methods[enc]
|
13
|
+
end
|
14
|
+
|
15
|
+
# Native, default format is UTF-8, so it is declared here rather than in
|
16
|
+
# an encodings/ definition.
|
17
|
+
UTF_8 = 'UTF-8'
|
18
|
+
UTF_16 = 'UTF-16'
|
19
|
+
UNILE = 'UNILE'
|
20
|
+
|
21
|
+
# ID ---> Encoding name
|
22
|
+
attr_reader :encoding
|
23
|
+
def encoding=( enc )
|
24
|
+
old_verbosity = $VERBOSE
|
25
|
+
begin
|
26
|
+
$VERBOSE = false
|
27
|
+
enc = enc.nil? ? nil : enc.upcase
|
28
|
+
return false if defined? @encoding and enc == @encoding
|
29
|
+
if enc and enc != UTF_8
|
30
|
+
@encoding = enc
|
31
|
+
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
|
32
|
+
@encoding.untaint
|
33
|
+
begin
|
34
|
+
require 'rexml/encodings/ICONV.rb'
|
35
|
+
Encoding.apply(self, "ICONV")
|
36
|
+
rescue LoadError, Exception
|
37
|
+
begin
|
38
|
+
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
|
39
|
+
require enc_file
|
40
|
+
Encoding.apply(self, @encoding)
|
41
|
+
rescue LoadError => err
|
42
|
+
puts err.message
|
43
|
+
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
|
44
|
+
end
|
45
|
+
end
|
46
|
+
else
|
47
|
+
@encoding = UTF_8
|
48
|
+
require 'rexml/encodings/UTF-8.rb'
|
49
|
+
Encoding.apply(self, @encoding)
|
50
|
+
end
|
51
|
+
ensure
|
52
|
+
$VERBOSE = old_verbosity
|
53
|
+
end
|
54
|
+
true
|
55
|
+
end
|
56
|
+
|
57
|
+
def check_encoding str
|
58
|
+
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
59
|
+
if str[0] == 0xfe && str[1] == 0xff
|
60
|
+
str[0,2] = ""
|
61
|
+
return UTF_16
|
62
|
+
elsif str[0] == 0xff && str[1] == 0xfe
|
63
|
+
str[0,2] = ""
|
64
|
+
return UNILE
|
65
|
+
end
|
66
|
+
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
|
67
|
+
return $3.upcase if $3
|
68
|
+
return UTF_8
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
#
|
2
|
+
# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
|
3
|
+
#
|
4
|
+
module REXML
|
5
|
+
module Encoding
|
6
|
+
register( "CP-1252" ) do |o|
|
7
|
+
class << o
|
8
|
+
alias encode encode_cp1252
|
9
|
+
alias decode decode_cp1252
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Convert from UTF-8
|
14
|
+
def encode_cp1252(content)
|
15
|
+
array_utf8 = content.unpack('U*')
|
16
|
+
array_enc = []
|
17
|
+
array_utf8.each do |num|
|
18
|
+
case num
|
19
|
+
# shortcut first bunch basic characters
|
20
|
+
when 0..0xFF; array_enc << num
|
21
|
+
# characters added compared to iso-8859-1
|
22
|
+
when 0x20AC; array_enc << 0x80 # 0xe2 0x82 0xac
|
23
|
+
when 0x201A; array_enc << 0x82 # 0xe2 0x82 0x9a
|
24
|
+
when 0x0192; array_enc << 0x83 # 0xc6 0x92
|
25
|
+
when 0x201E; array_enc << 0x84 # 0xe2 0x82 0x9e
|
26
|
+
when 0x2026; array_enc << 0x85 # 0xe2 0x80 0xa6
|
27
|
+
when 0x2020; array_enc << 0x86 # 0xe2 0x80 0xa0
|
28
|
+
when 0x2021; array_enc << 0x87 # 0xe2 0x80 0xa1
|
29
|
+
when 0x02C6; array_enc << 0x88 # 0xcb 0x86
|
30
|
+
when 0x2030; array_enc << 0x89 # 0xe2 0x80 0xb0
|
31
|
+
when 0x0160; array_enc << 0x8A # 0xc5 0xa0
|
32
|
+
when 0x2039; array_enc << 0x8B # 0xe2 0x80 0xb9
|
33
|
+
when 0x0152; array_enc << 0x8C # 0xc5 0x92
|
34
|
+
when 0x017D; array_enc << 0x8E # 0xc5 0xbd
|
35
|
+
when 0x2018; array_enc << 0x91 # 0xe2 0x80 0x98
|
36
|
+
when 0x2019; array_enc << 0x92 # 0xe2 0x80 0x99
|
37
|
+
when 0x201C; array_enc << 0x93 # 0xe2 0x80 0x9c
|
38
|
+
when 0x201D; array_enc << 0x94 # 0xe2 0x80 0x9d
|
39
|
+
when 0x2022; array_enc << 0x95 # 0xe2 0x80 0xa2
|
40
|
+
when 0x2013; array_enc << 0x96 # 0xe2 0x80 0x93
|
41
|
+
when 0x2014; array_enc << 0x97 # 0xe2 0x80 0x94
|
42
|
+
when 0x02DC; array_enc << 0x98 # 0xcb 0x9c
|
43
|
+
when 0x2122; array_enc << 0x99 # 0xe2 0x84 0xa2
|
44
|
+
when 0x0161; array_enc << 0x9A # 0xc5 0xa1
|
45
|
+
when 0x203A; array_enc << 0x9B # 0xe2 0x80 0xba
|
46
|
+
when 0x0152; array_enc << 0x9C # 0xc5 0x93
|
47
|
+
when 0x017E; array_enc << 0x9E # 0xc5 0xbe
|
48
|
+
when 0x0178; array_enc << 0x9F # 0xc5 0xb8
|
49
|
+
else
|
50
|
+
# all remaining basic characters can be used directly
|
51
|
+
if num <= 0xFF
|
52
|
+
array_enc << num
|
53
|
+
else
|
54
|
+
# Numeric entity (&#nnnn;); shard by Stefan Scholl
|
55
|
+
array_enc.concat "&\##{num};".unpack('C*')
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
array_enc.pack('C*')
|
60
|
+
end
|
61
|
+
|
62
|
+
# Convert to UTF-8
|
63
|
+
def decode_cp1252(str)
|
64
|
+
array_latin9 = str.unpack('C*')
|
65
|
+
array_enc = []
|
66
|
+
array_latin9.each do |num|
|
67
|
+
case num
|
68
|
+
# characters that added compared to iso-8859-1
|
69
|
+
when 0x80; array_enc << 0x20AC # 0xe2 0x82 0xac
|
70
|
+
when 0x82; array_enc << 0x201A # 0xe2 0x82 0x9a
|
71
|
+
when 0x83; array_enc << 0x0192 # 0xc6 0x92
|
72
|
+
when 0x84; array_enc << 0x201E # 0xe2 0x82 0x9e
|
73
|
+
when 0x85; array_enc << 0x2026 # 0xe2 0x80 0xa6
|
74
|
+
when 0x86; array_enc << 0x2020 # 0xe2 0x80 0xa0
|
75
|
+
when 0x87; array_enc << 0x2021 # 0xe2 0x80 0xa1
|
76
|
+
when 0x88; array_enc << 0x02C6 # 0xcb 0x86
|
77
|
+
when 0x89; array_enc << 0x2030 # 0xe2 0x80 0xb0
|
78
|
+
when 0x8A; array_enc << 0x0160 # 0xc5 0xa0
|
79
|
+
when 0x8B; array_enc << 0x2039 # 0xe2 0x80 0xb9
|
80
|
+
when 0x8C; array_enc << 0x0152 # 0xc5 0x92
|
81
|
+
when 0x8E; array_enc << 0x017D # 0xc5 0xbd
|
82
|
+
when 0x91; array_enc << 0x2018 # 0xe2 0x80 0x98
|
83
|
+
when 0x92; array_enc << 0x2019 # 0xe2 0x80 0x99
|
84
|
+
when 0x93; array_enc << 0x201C # 0xe2 0x80 0x9c
|
85
|
+
when 0x94; array_enc << 0x201D # 0xe2 0x80 0x9d
|
86
|
+
when 0x95; array_enc << 0x2022 # 0xe2 0x80 0xa2
|
87
|
+
when 0x96; array_enc << 0x2013 # 0xe2 0x80 0x93
|
88
|
+
when 0x97; array_enc << 0x2014 # 0xe2 0x80 0x94
|
89
|
+
when 0x98; array_enc << 0x02DC # 0xcb 0x9c
|
90
|
+
when 0x99; array_enc << 0x2122 # 0xe2 0x84 0xa2
|
91
|
+
when 0x9A; array_enc << 0x0161 # 0xc5 0xa1
|
92
|
+
when 0x9B; array_enc << 0x203A # 0xe2 0x80 0xba
|
93
|
+
when 0x9C; array_enc << 0x0152 # 0xc5 0x93
|
94
|
+
when 0x9E; array_enc << 0x017E # 0xc5 0xbe
|
95
|
+
when 0x9F; array_enc << 0x0178 # 0xc5 0xb8
|
96
|
+
else
|
97
|
+
array_enc << num
|
98
|
+
end
|
99
|
+
end
|
100
|
+
array_enc.pack('U*')
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module REXML
|
2
|
+
module Encoding
|
3
|
+
begin
|
4
|
+
require 'uconv'
|
5
|
+
|
6
|
+
def decode_eucjp(str)
|
7
|
+
Uconv::euctou8(str)
|
8
|
+
end
|
9
|
+
|
10
|
+
def encode_eucjp content
|
11
|
+
Uconv::u8toeuc(content)
|
12
|
+
end
|
13
|
+
rescue LoadError
|
14
|
+
require 'nkf'
|
15
|
+
|
16
|
+
EUCTOU8 = '-Ewm0'
|
17
|
+
U8TOEUC = '-Wem0'
|
18
|
+
|
19
|
+
def decode_eucjp(str)
|
20
|
+
NKF.nkf(EUCTOU8, str)
|
21
|
+
end
|
22
|
+
|
23
|
+
def encode_eucjp content
|
24
|
+
NKF.nkf(U8TOEUC, content)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
register("EUC-JP") do |obj|
|
29
|
+
class << obj
|
30
|
+
alias decode decode_eucjp
|
31
|
+
alias encode encode_eucjp
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require "iconv"
|
2
|
+
raise LoadError unless defined? Iconv
|
3
|
+
|
4
|
+
module REXML
|
5
|
+
module Encoding
|
6
|
+
def decode_iconv(str)
|
7
|
+
Iconv.conv(UTF_8, @encoding, str)
|
8
|
+
end
|
9
|
+
|
10
|
+
def encode_iconv(content)
|
11
|
+
Iconv.conv(@encoding, UTF_8, content)
|
12
|
+
end
|
13
|
+
|
14
|
+
register("ICONV") do |obj|
|
15
|
+
Iconv.conv(UTF_8, obj.encoding, nil)
|
16
|
+
class << obj
|
17
|
+
alias decode decode_iconv
|
18
|
+
alias encode encode_iconv
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
#
|
2
|
+
# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
|
3
|
+
#
|
4
|
+
module REXML
|
5
|
+
module Encoding
|
6
|
+
register("ISO-8859-15") do |o|
|
7
|
+
alias encode to_iso_8859_15
|
8
|
+
alias decode from_iso_8859_15
|
9
|
+
end
|
10
|
+
|
11
|
+
# Convert from UTF-8
|
12
|
+
def to_iso_8859_15(content)
|
13
|
+
array_utf8 = content.unpack('U*')
|
14
|
+
array_enc = []
|
15
|
+
array_utf8.each do |num|
|
16
|
+
case num
|
17
|
+
# shortcut first bunch basic characters
|
18
|
+
when 0..0xA3; array_enc << num
|
19
|
+
# characters removed compared to iso-8859-1
|
20
|
+
when 0xA4; array_enc << '¤'
|
21
|
+
when 0xA6; array_enc << '¦'
|
22
|
+
when 0xA8; array_enc << '¨'
|
23
|
+
when 0xB4; array_enc << '´'
|
24
|
+
when 0xB8; array_enc << '¸'
|
25
|
+
when 0xBC; array_enc << '¼'
|
26
|
+
when 0xBD; array_enc << '½'
|
27
|
+
when 0xBE; array_enc << '¾'
|
28
|
+
# characters added compared to iso-8859-1
|
29
|
+
when 0x20AC; array_enc << 0xA4 # 0xe2 0x82 0xac
|
30
|
+
when 0x0160; array_enc << 0xA6 # 0xc5 0xa0
|
31
|
+
when 0x0161; array_enc << 0xA8 # 0xc5 0xa1
|
32
|
+
when 0x017D; array_enc << 0xB4 # 0xc5 0xbd
|
33
|
+
when 0x017E; array_enc << 0xB8 # 0xc5 0xbe
|
34
|
+
when 0x0152; array_enc << 0xBC # 0xc5 0x92
|
35
|
+
when 0x0153; array_enc << 0xBD # 0xc5 0x93
|
36
|
+
when 0x0178; array_enc << 0xBE # 0xc5 0xb8
|
37
|
+
else
|
38
|
+
# all remaining basic characters can be used directly
|
39
|
+
if num <= 0xFF
|
40
|
+
array_enc << num
|
41
|
+
else
|
42
|
+
# Numeric entity (&#nnnn;); shard by Stefan Scholl
|
43
|
+
array_enc.concat "&\##{num};".unpack('C*')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
array_enc.pack('C*')
|
48
|
+
end
|
49
|
+
|
50
|
+
# Convert to UTF-8
|
51
|
+
def from_iso_8859_15(str)
|
52
|
+
array_latin9 = str.unpack('C*')
|
53
|
+
array_enc = []
|
54
|
+
array_latin9.each do |num|
|
55
|
+
case num
|
56
|
+
# characters that differ compared to iso-8859-1
|
57
|
+
when 0xA4; array_enc << 0x20AC
|
58
|
+
when 0xA6; array_enc << 0x0160
|
59
|
+
when 0xA8; array_enc << 0x0161
|
60
|
+
when 0xB4; array_enc << 0x017D
|
61
|
+
when 0xB8; array_enc << 0x017E
|
62
|
+
when 0xBC; array_enc << 0x0152
|
63
|
+
when 0xBD; array_enc << 0x0153
|
64
|
+
when 0xBE; array_enc << 0x0178
|
65
|
+
else
|
66
|
+
array_enc << num
|
67
|
+
end
|
68
|
+
end
|
69
|
+
array_enc.pack('U*')
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module REXML
|
2
|
+
module Encoding
|
3
|
+
begin
|
4
|
+
require 'uconv'
|
5
|
+
|
6
|
+
def decode_sjis content
|
7
|
+
Uconv::sjistou8(content)
|
8
|
+
end
|
9
|
+
|
10
|
+
def encode_sjis(str)
|
11
|
+
Uconv::u8tosjis(str)
|
12
|
+
end
|
13
|
+
rescue LoadError
|
14
|
+
require 'nkf'
|
15
|
+
|
16
|
+
SJISTOU8 = '-Swm0x'
|
17
|
+
U8TOSJIS = '-Wsm0x'
|
18
|
+
|
19
|
+
def decode_sjis(str)
|
20
|
+
NKF.nkf(SJISTOU8, str)
|
21
|
+
end
|
22
|
+
|
23
|
+
def encode_sjis content
|
24
|
+
NKF.nkf(U8TOSJIS, content)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
b = proc do |obj|
|
29
|
+
class << obj
|
30
|
+
alias decode decode_sjis
|
31
|
+
alias encode encode_sjis
|
32
|
+
end
|
33
|
+
end
|
34
|
+
register("SHIFT-JIS", &b)
|
35
|
+
register("SHIFT_JIS", &b)
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'rexml/encodings/SHIFT-JIS'
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module REXML
|
2
|
+
module Encoding
|
3
|
+
def encode_unile content
|
4
|
+
array_utf8 = content.unpack("U*")
|
5
|
+
array_enc = []
|
6
|
+
array_utf8.each do |num|
|
7
|
+
if ((num>>16) > 0)
|
8
|
+
array_enc << ??
|
9
|
+
array_enc << 0
|
10
|
+
else
|
11
|
+
array_enc << (num & 0xFF)
|
12
|
+
array_enc << (num >> 8)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
array_enc.pack('C*')
|
16
|
+
end
|
17
|
+
|
18
|
+
def decode_unile(str)
|
19
|
+
array_enc=str.unpack('C*')
|
20
|
+
array_utf8 = []
|
21
|
+
0.step(array_enc.size-1, 2){|i|
|
22
|
+
array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
|
23
|
+
}
|
24
|
+
array_utf8.pack('U*')
|
25
|
+
end
|
26
|
+
|
27
|
+
register(UNILE) do |obj|
|
28
|
+
class << obj
|
29
|
+
alias decode decode_unile
|
30
|
+
alias encode encode_unile
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module REXML
|
2
|
+
module Encoding
|
3
|
+
# Convert from UTF-8
|
4
|
+
def encode_ascii content
|
5
|
+
array_utf8 = content.unpack('U*')
|
6
|
+
array_enc = []
|
7
|
+
array_utf8.each do |num|
|
8
|
+
if num <= 0x7F
|
9
|
+
array_enc << num
|
10
|
+
else
|
11
|
+
# Numeric entity (&#nnnn;); shard by Stefan Scholl
|
12
|
+
array_enc.concat "&\##{num};".unpack('C*')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
array_enc.pack('C*')
|
16
|
+
end
|
17
|
+
|
18
|
+
# Convert to UTF-8
|
19
|
+
def decode_ascii(str)
|
20
|
+
str.unpack('C*').pack('U*')
|
21
|
+
end
|
22
|
+
|
23
|
+
register("US-ASCII") do |obj|
|
24
|
+
class << obj
|
25
|
+
alias decode decode_ascii
|
26
|
+
alias encode encode_ascii
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module REXML
|
2
|
+
module Encoding
|
3
|
+
def encode_utf16 content
|
4
|
+
array_utf8 = content.unpack("U*")
|
5
|
+
array_enc = []
|
6
|
+
array_utf8.each do |num|
|
7
|
+
if ((num>>16) > 0)
|
8
|
+
array_enc << 0
|
9
|
+
array_enc << ??
|
10
|
+
else
|
11
|
+
array_enc << (num >> 8)
|
12
|
+
array_enc << (num & 0xFF)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
array_enc.pack('C*')
|
16
|
+
end
|
17
|
+
|
18
|
+
def decode_utf16(str)
|
19
|
+
str = str[2..-1] if /^\376\377/n =~ str
|
20
|
+
array_enc=str.unpack('C*')
|
21
|
+
array_utf8 = []
|
22
|
+
0.step(array_enc.size-1, 2){|i|
|
23
|
+
array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
|
24
|
+
}
|
25
|
+
array_utf8.pack('U*')
|
26
|
+
end
|
27
|
+
|
28
|
+
register(UTF_16) do |obj|
|
29
|
+
class << obj
|
30
|
+
alias decode decode_utf16
|
31
|
+
alias encode encode_utf16
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|