rbs 2.1.0 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/comments.yml +34 -0
- data/CHANGELOG.md +45 -0
- data/Rakefile +52 -21
- data/core/encoding.rbs +742 -0
- data/core/file.rbs +1 -3
- data/core/kernel.rbs +5 -3
- data/docs/syntax.md +54 -11
- data/ext/rbs_extension/extconf.rb +1 -0
- data/ext/rbs_extension/lexer.h +5 -0
- data/ext/rbs_extension/lexstate.c +6 -0
- data/ext/rbs_extension/parser.c +85 -10
- data/ext/rbs_extension/ruby_objs.c +4 -2
- data/ext/rbs_extension/ruby_objs.h +2 -2
- data/goodcheck.yml +0 -11
- data/lib/rbs/annotate/rdoc_annotator.rb +2 -2
- data/lib/rbs/ast/members.rb +21 -13
- data/lib/rbs/buffer.rb +17 -11
- data/lib/rbs/cli.rb +5 -2
- data/lib/rbs/definition_builder/method_builder.rb +28 -16
- data/lib/rbs/definition_builder.rb +1 -1
- data/lib/rbs/environment.rb +8 -4
- data/lib/rbs/namespace.rb +1 -1
- data/lib/rbs/type_name.rb +1 -1
- data/lib/rbs/types.rb +1 -1
- data/lib/rbs/validator.rb +2 -0
- data/lib/rbs/version.rb +1 -1
- data/lib/rbs/writer.rb +54 -4
- data/lib/rbs.rb +0 -2
- data/schema/typeParam.json +3 -3
- data/sig/annotate/rdoc_annotater.rbs +1 -1
- data/sig/buffer.rbs +6 -2
- data/sig/members.rbs +24 -18
- data/sig/method_builder.rbs +5 -4
- data/sig/writer.rbs +79 -2
- data/stdlib/net-http/0/manifest.yaml +1 -0
- data/stdlib/net-http/0/net-http.rbs +21 -0
- data/stdlib/uri/0/generic.rbs +2 -2
- data/stdlib/uri/0/http.rbs +35 -0
- metadata +3 -3
- data/sig/char_scanner.rbs +0 -9
data/core/encoding.rbs
CHANGED
@@ -879,6 +879,587 @@ Encoding::Windows_874: Encoding
|
|
879
879
|
# Encoding conversion class.
|
880
880
|
#
|
881
881
|
class Encoding::Converter < Object
|
882
|
+
type encoding = String | Encoding
|
883
|
+
type decorator = "universal_newline"
|
884
|
+
| "crlf_newline"
|
885
|
+
| "cr_newline"
|
886
|
+
| "xml_text_escape"
|
887
|
+
| "xml_attr_content_escape"
|
888
|
+
| "xml_attr_quote"
|
889
|
+
type conversion_path = Array[[encoding, encoding] | decorator]
|
890
|
+
type convert_result = :invalid_byte_sequence
|
891
|
+
| :incomplete_input
|
892
|
+
| :undefined_conversion
|
893
|
+
| :after_output
|
894
|
+
| :destination_buffer_full
|
895
|
+
| :source_buffer_empty
|
896
|
+
| :finished
|
897
|
+
|
898
|
+
# <!--
|
899
|
+
# rdoc-file=transcode.c
|
900
|
+
# - Encoding::Converter.asciicompat_encoding(string) -> encoding or nil
|
901
|
+
# - Encoding::Converter.asciicompat_encoding(encoding) -> encoding or nil
|
902
|
+
# -->
|
903
|
+
# Returns the corresponding ASCII compatible encoding.
|
904
|
+
#
|
905
|
+
# Returns nil if the argument is an ASCII compatible encoding.
|
906
|
+
#
|
907
|
+
# "corresponding ASCII compatible encoding" is an ASCII compatible encoding
|
908
|
+
# which can represents exactly the same characters as the given ASCII
|
909
|
+
# incompatible encoding. So, no conversion undefined error occurs when
|
910
|
+
# converting between the two encodings.
|
911
|
+
#
|
912
|
+
# Encoding::Converter.asciicompat_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP>
|
913
|
+
# Encoding::Converter.asciicompat_encoding("UTF-16BE") #=> #<Encoding:UTF-8>
|
914
|
+
# Encoding::Converter.asciicompat_encoding("UTF-8") #=> nil
|
915
|
+
#
|
916
|
+
def self.asciicompat_encoding: (encoding enc) -> Encoding?
|
917
|
+
|
918
|
+
# <!--
|
919
|
+
# rdoc-file=transcode.c
|
920
|
+
# - Encoding::Converter.search_convpath(source_encoding, destination_encoding) -> ary
|
921
|
+
# - Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt) -> ary
|
922
|
+
# -->
|
923
|
+
# Returns a conversion path.
|
924
|
+
#
|
925
|
+
# p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP")
|
926
|
+
# #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
|
927
|
+
# # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>]]
|
928
|
+
#
|
929
|
+
# p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true)
|
930
|
+
# or
|
931
|
+
# p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", newline: :universal)
|
932
|
+
# #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
|
933
|
+
# # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
|
934
|
+
# # "universal_newline"]
|
935
|
+
#
|
936
|
+
# p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true)
|
937
|
+
# or
|
938
|
+
# p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", newline: :universal)
|
939
|
+
# #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
|
940
|
+
# # "universal_newline",
|
941
|
+
# # [#<Encoding:UTF-8>, #<Encoding:UTF-32BE>]]
|
942
|
+
#
|
943
|
+
def self.search_convpath: (
|
944
|
+
encoding source,
|
945
|
+
encoding destination,
|
946
|
+
?newline: :universal | :crlf | :cr,
|
947
|
+
?universal_newline: bool,
|
948
|
+
?crlf_newline: bool,
|
949
|
+
?cr_newline: bool,
|
950
|
+
?xml: :text | :attr
|
951
|
+
) -> conversion_path
|
952
|
+
|
953
|
+
public
|
954
|
+
|
955
|
+
# <!--
|
956
|
+
# rdoc-file=transcode.c
|
957
|
+
# - ec == other -> true or false
|
958
|
+
# -->
|
959
|
+
#
|
960
|
+
def ==: (self) -> bool
|
961
|
+
|
962
|
+
# <!--
|
963
|
+
# rdoc-file=transcode.c
|
964
|
+
# - ec.convert(source_string) -> destination_string
|
965
|
+
# -->
|
966
|
+
# Convert source_string and return destination_string.
|
967
|
+
#
|
968
|
+
# source_string is assumed as a part of source. i.e. :partial_input=>true is
|
969
|
+
# specified internally. finish method should be used last.
|
970
|
+
#
|
971
|
+
# ec = Encoding::Converter.new("utf-8", "euc-jp")
|
972
|
+
# puts ec.convert("\u3042").dump #=> "\xA4\xA2"
|
973
|
+
# puts ec.finish.dump #=> ""
|
974
|
+
#
|
975
|
+
# ec = Encoding::Converter.new("euc-jp", "utf-8")
|
976
|
+
# puts ec.convert("\xA4").dump #=> ""
|
977
|
+
# puts ec.convert("\xA2").dump #=> "\xE3\x81\x82"
|
978
|
+
# puts ec.finish.dump #=> ""
|
979
|
+
#
|
980
|
+
# ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
|
981
|
+
# puts ec.convert("\xE3").dump #=> "".force_encoding("ISO-2022-JP")
|
982
|
+
# puts ec.convert("\x81").dump #=> "".force_encoding("ISO-2022-JP")
|
983
|
+
# puts ec.convert("\x82").dump #=> "\e$B$\"".force_encoding("ISO-2022-JP")
|
984
|
+
# puts ec.finish.dump #=> "\e(B".force_encoding("ISO-2022-JP")
|
985
|
+
#
|
986
|
+
# If a conversion error occur, Encoding::UndefinedConversionError or
|
987
|
+
# Encoding::InvalidByteSequenceError is raised. Encoding::Converter#convert
|
988
|
+
# doesn't supply methods to recover or restart from these exceptions. When you
|
989
|
+
# want to handle these conversion errors, use
|
990
|
+
# Encoding::Converter#primitive_convert.
|
991
|
+
#
|
992
|
+
def convert: (String source) -> String
|
993
|
+
|
994
|
+
# <!--
|
995
|
+
# rdoc-file=transcode.c
|
996
|
+
# - ec.convpath -> ary
|
997
|
+
# -->
|
998
|
+
# Returns the conversion path of ec.
|
999
|
+
#
|
1000
|
+
# The result is an array of conversions.
|
1001
|
+
#
|
1002
|
+
# ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP", crlf_newline: true)
|
1003
|
+
# p ec.convpath
|
1004
|
+
# #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
|
1005
|
+
# # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
|
1006
|
+
# # "crlf_newline"]
|
1007
|
+
#
|
1008
|
+
# Each element of the array is a pair of encodings or a string. A pair means an
|
1009
|
+
# encoding conversion. A string means a decorator.
|
1010
|
+
#
|
1011
|
+
# In the above example, [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>] means a
|
1012
|
+
# converter from ISO-8859-1 to UTF-8. "crlf_newline" means newline converter
|
1013
|
+
# from LF to CRLF.
|
1014
|
+
#
|
1015
|
+
def convpath: () -> conversion_path
|
1016
|
+
|
1017
|
+
# <!--
|
1018
|
+
# rdoc-file=transcode.c
|
1019
|
+
# - ec.destination_encoding -> encoding
|
1020
|
+
# -->
|
1021
|
+
# Returns the destination encoding as an Encoding object.
|
1022
|
+
#
|
1023
|
+
def destination_encoding: () -> Encoding
|
1024
|
+
|
1025
|
+
# <!--
|
1026
|
+
# rdoc-file=transcode.c
|
1027
|
+
# - ec.finish -> string
|
1028
|
+
# -->
|
1029
|
+
# Finishes the converter. It returns the last part of the converted string.
|
1030
|
+
#
|
1031
|
+
# ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
|
1032
|
+
# p ec.convert("\u3042") #=> "\e$B$\""
|
1033
|
+
# p ec.finish #=> "\e(B"
|
1034
|
+
#
|
1035
|
+
def finish: () -> String
|
1036
|
+
|
1037
|
+
# <!--
|
1038
|
+
# rdoc-file=transcode.c
|
1039
|
+
# - ec.insert_output(string) -> nil
|
1040
|
+
# -->
|
1041
|
+
# Inserts string into the encoding converter. The string will be converted to
|
1042
|
+
# the destination encoding and output on later conversions.
|
1043
|
+
#
|
1044
|
+
# If the destination encoding is stateful, string is converted according to the
|
1045
|
+
# state and the state is updated.
|
1046
|
+
#
|
1047
|
+
# This method should be used only when a conversion error occurs.
|
1048
|
+
#
|
1049
|
+
# ec = Encoding::Converter.new("utf-8", "iso-8859-1")
|
1050
|
+
# src = "HIRAGANA LETTER A is \u{3042}."
|
1051
|
+
# dst = ""
|
1052
|
+
# p ec.primitive_convert(src, dst) #=> :undefined_conversion
|
1053
|
+
# puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is ", "."]
|
1054
|
+
# ec.insert_output("<err>")
|
1055
|
+
# p ec.primitive_convert(src, dst) #=> :finished
|
1056
|
+
# puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is <err>.", ""]
|
1057
|
+
#
|
1058
|
+
# ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
|
1059
|
+
# src = "\u{306F 3041 3068 2661 3002}" # U+2661 is not representable in iso-2022-jp
|
1060
|
+
# dst = ""
|
1061
|
+
# p ec.primitive_convert(src, dst) #=> :undefined_conversion
|
1062
|
+
# puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H".force_encoding("ISO-2022-JP"), "\xE3\x80\x82"]
|
1063
|
+
# ec.insert_output "?" # state change required to output "?".
|
1064
|
+
# p ec.primitive_convert(src, dst) #=> :finished
|
1065
|
+
# puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H\e(B?\e$B!#\e(B".force_encoding("ISO-2022-JP"), ""]
|
1066
|
+
#
|
1067
|
+
def insert_output: (String) -> nil
|
1068
|
+
|
1069
|
+
# <!--
|
1070
|
+
# rdoc-file=transcode.c
|
1071
|
+
# - ec.inspect -> string
|
1072
|
+
# -->
|
1073
|
+
# Returns a printable version of *ec*
|
1074
|
+
#
|
1075
|
+
# ec = Encoding::Converter.new("iso-8859-1", "utf-8")
|
1076
|
+
# puts ec.inspect #=> #<Encoding::Converter: ISO-8859-1 to UTF-8>
|
1077
|
+
#
|
1078
|
+
def inspect: () -> String
|
1079
|
+
|
1080
|
+
# <!--
|
1081
|
+
# rdoc-file=transcode.c
|
1082
|
+
# - ec.last_error -> exception or nil
|
1083
|
+
# -->
|
1084
|
+
# Returns an exception object for the last conversion. Returns nil if the last
|
1085
|
+
# conversion did not produce an error.
|
1086
|
+
#
|
1087
|
+
# "error" means that Encoding::InvalidByteSequenceError and
|
1088
|
+
# Encoding::UndefinedConversionError for Encoding::Converter#convert and
|
1089
|
+
# :invalid_byte_sequence, :incomplete_input and :undefined_conversion for
|
1090
|
+
# Encoding::Converter#primitive_convert.
|
1091
|
+
#
|
1092
|
+
# ec = Encoding::Converter.new("utf-8", "iso-8859-1")
|
1093
|
+
# p ec.primitive_convert(src="\xf1abcd", dst="") #=> :invalid_byte_sequence
|
1094
|
+
# p ec.last_error #=> #<Encoding::InvalidByteSequenceError: "\xF1" followed by "a" on UTF-8>
|
1095
|
+
# p ec.primitive_convert(src, dst, nil, 1) #=> :destination_buffer_full
|
1096
|
+
# p ec.last_error #=> nil
|
1097
|
+
#
|
1098
|
+
def last_error: () -> Encoding::InvalidByteSequenceError?
|
1099
|
+
| () -> Encoding::UndefinedConversionError?
|
1100
|
+
|
1101
|
+
# <!--
|
1102
|
+
# rdoc-file=transcode.c
|
1103
|
+
# - ec.primitive_convert(source_buffer, destination_buffer) -> symbol
|
1104
|
+
# - ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol
|
1105
|
+
# - ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
|
1106
|
+
# - ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol
|
1107
|
+
# -->
|
1108
|
+
# possible opt elements:
|
1109
|
+
# hash form:
|
1110
|
+
# :partial_input => true # source buffer may be part of larger source
|
1111
|
+
# :after_output => true # stop conversion after output before input
|
1112
|
+
# integer form:
|
1113
|
+
# Encoding::Converter::PARTIAL_INPUT
|
1114
|
+
# Encoding::Converter::AFTER_OUTPUT
|
1115
|
+
#
|
1116
|
+
# possible results:
|
1117
|
+
# :invalid_byte_sequence
|
1118
|
+
# :incomplete_input
|
1119
|
+
# :undefined_conversion
|
1120
|
+
# :after_output
|
1121
|
+
# :destination_buffer_full
|
1122
|
+
# :source_buffer_empty
|
1123
|
+
# :finished
|
1124
|
+
#
|
1125
|
+
# primitive_convert converts source_buffer into destination_buffer.
|
1126
|
+
#
|
1127
|
+
# source_buffer should be a string or nil. nil means an empty string.
|
1128
|
+
#
|
1129
|
+
# destination_buffer should be a string.
|
1130
|
+
#
|
1131
|
+
# destination_byteoffset should be an integer or nil. nil means the end of
|
1132
|
+
# destination_buffer. If it is omitted, nil is assumed.
|
1133
|
+
#
|
1134
|
+
# destination_bytesize should be an integer or nil. nil means unlimited. If it
|
1135
|
+
# is omitted, nil is assumed.
|
1136
|
+
#
|
1137
|
+
# opt should be nil, a hash or an integer. nil means no flags. If it is omitted,
|
1138
|
+
# nil is assumed.
|
1139
|
+
#
|
1140
|
+
# primitive_convert converts the content of source_buffer from beginning and
|
1141
|
+
# store the result into destination_buffer.
|
1142
|
+
#
|
1143
|
+
# destination_byteoffset and destination_bytesize specify the region which the
|
1144
|
+
# converted result is stored. destination_byteoffset specifies the start
|
1145
|
+
# position in destination_buffer in bytes. If destination_byteoffset is nil,
|
1146
|
+
# destination_buffer.bytesize is used for appending the result.
|
1147
|
+
# destination_bytesize specifies maximum number of bytes. If
|
1148
|
+
# destination_bytesize is nil, destination size is unlimited. After conversion,
|
1149
|
+
# destination_buffer is resized to destination_byteoffset + actually produced
|
1150
|
+
# number of bytes. Also destination_buffer's encoding is set to
|
1151
|
+
# destination_encoding.
|
1152
|
+
#
|
1153
|
+
# primitive_convert drops the converted part of source_buffer. the dropped part
|
1154
|
+
# is converted in destination_buffer or buffered in Encoding::Converter object.
|
1155
|
+
#
|
1156
|
+
# primitive_convert stops conversion when one of following condition met.
|
1157
|
+
# * invalid byte sequence found in source buffer (:invalid_byte_sequence)
|
1158
|
+
# `primitive_errinfo` and `last_error` methods returns the detail of the
|
1159
|
+
# error.
|
1160
|
+
# * unexpected end of source buffer (:incomplete_input) this occur only when
|
1161
|
+
# :partial_input is not specified. `primitive_errinfo` and `last_error`
|
1162
|
+
# methods returns the detail of the error.
|
1163
|
+
# * character not representable in output encoding (:undefined_conversion)
|
1164
|
+
# `primitive_errinfo` and `last_error` methods returns the detail of the
|
1165
|
+
# error.
|
1166
|
+
# * after some output is generated, before input is done (:after_output) this
|
1167
|
+
# occur only when :after_output is specified.
|
1168
|
+
# * destination buffer is full (:destination_buffer_full) this occur only when
|
1169
|
+
# destination_bytesize is non-nil.
|
1170
|
+
# * source buffer is empty (:source_buffer_empty) this occur only when
|
1171
|
+
# :partial_input is specified.
|
1172
|
+
# * conversion is finished (:finished)
|
1173
|
+
#
|
1174
|
+
#
|
1175
|
+
# example:
|
1176
|
+
# ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
|
1177
|
+
# ret = ec.primitive_convert(src="pi", dst="", nil, 100)
|
1178
|
+
# p [ret, src, dst] #=> [:finished, "", "\x00p\x00i"]
|
1179
|
+
#
|
1180
|
+
# ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
|
1181
|
+
# ret = ec.primitive_convert(src="pi", dst="", nil, 1)
|
1182
|
+
# p [ret, src, dst] #=> [:destination_buffer_full, "i", "\x00"]
|
1183
|
+
# ret = ec.primitive_convert(src, dst="", nil, 1)
|
1184
|
+
# p [ret, src, dst] #=> [:destination_buffer_full, "", "p"]
|
1185
|
+
# ret = ec.primitive_convert(src, dst="", nil, 1)
|
1186
|
+
# p [ret, src, dst] #=> [:destination_buffer_full, "", "\x00"]
|
1187
|
+
# ret = ec.primitive_convert(src, dst="", nil, 1)
|
1188
|
+
# p [ret, src, dst] #=> [:finished, "", "i"]
|
1189
|
+
#
|
1190
|
+
def primitive_convert: (
|
1191
|
+
String? source,
|
1192
|
+
String destination,
|
1193
|
+
?Integer? destination_byteoffset,
|
1194
|
+
?Integer? destination_bytesize,
|
1195
|
+
?partial_input: bool,
|
1196
|
+
?after_output: bool
|
1197
|
+
) -> convert_result
|
1198
|
+
| (
|
1199
|
+
String? source,
|
1200
|
+
String destination,
|
1201
|
+
?Integer? destination_byteoffset,
|
1202
|
+
?Integer? destination_bytesize,
|
1203
|
+
?Integer opt
|
1204
|
+
) -> convert_result
|
1205
|
+
|
1206
|
+
# <!--
|
1207
|
+
# rdoc-file=transcode.c
|
1208
|
+
# - ec.primitive_errinfo -> array
|
1209
|
+
# -->
|
1210
|
+
# primitive_errinfo returns important information regarding the last error as a
|
1211
|
+
# 5-element array:
|
1212
|
+
#
|
1213
|
+
# [result, enc1, enc2, error_bytes, readagain_bytes]
|
1214
|
+
#
|
1215
|
+
# result is the last result of primitive_convert.
|
1216
|
+
#
|
1217
|
+
# Other elements are only meaningful when result is :invalid_byte_sequence,
|
1218
|
+
# :incomplete_input or :undefined_conversion.
|
1219
|
+
#
|
1220
|
+
# enc1 and enc2 indicate a conversion step as a pair of strings. For example, a
|
1221
|
+
# converter from EUC-JP to ISO-8859-1 converts a string as follows: EUC-JP ->
|
1222
|
+
# UTF-8 -> ISO-8859-1. So [enc1, enc2] is either ["EUC-JP", "UTF-8"] or
|
1223
|
+
# ["UTF-8", "ISO-8859-1"].
|
1224
|
+
#
|
1225
|
+
# error_bytes and readagain_bytes indicate the byte sequences which caused the
|
1226
|
+
# error. error_bytes is discarded portion. readagain_bytes is buffered portion
|
1227
|
+
# which is read again on next conversion.
|
1228
|
+
#
|
1229
|
+
# Example:
|
1230
|
+
#
|
1231
|
+
# # \xff is invalid as EUC-JP.
|
1232
|
+
# ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
|
1233
|
+
# ec.primitive_convert(src="\xff", dst="", nil, 10)
|
1234
|
+
# p ec.primitive_errinfo
|
1235
|
+
# #=> [:invalid_byte_sequence, "EUC-JP", "Shift_JIS", "\xFF", ""]
|
1236
|
+
#
|
1237
|
+
# # HIRAGANA LETTER A (\xa4\xa2 in EUC-JP) is not representable in ISO-8859-1.
|
1238
|
+
# # Since this error is occur in UTF-8 to ISO-8859-1 conversion,
|
1239
|
+
# # error_bytes is HIRAGANA LETTER A in UTF-8 (\xE3\x81\x82).
|
1240
|
+
# ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
1241
|
+
# ec.primitive_convert(src="\xa4\xa2", dst="", nil, 10)
|
1242
|
+
# p ec.primitive_errinfo
|
1243
|
+
# #=> [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", ""]
|
1244
|
+
#
|
1245
|
+
# # partial character is invalid
|
1246
|
+
# ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
1247
|
+
# ec.primitive_convert(src="\xa4", dst="", nil, 10)
|
1248
|
+
# p ec.primitive_errinfo
|
1249
|
+
# #=> [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""]
|
1250
|
+
#
|
1251
|
+
# # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by
|
1252
|
+
# # partial characters.
|
1253
|
+
# ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
1254
|
+
# ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
|
1255
|
+
# p ec.primitive_errinfo
|
1256
|
+
# #=> [:source_buffer_empty, nil, nil, nil, nil]
|
1257
|
+
#
|
1258
|
+
# # \xd8\x00\x00@ is invalid as UTF-16BE because
|
1259
|
+
# # no low surrogate after high surrogate (\xd8\x00).
|
1260
|
+
# # It is detected by 3rd byte (\00) which is part of next character.
|
1261
|
+
# # So the high surrogate (\xd8\x00) is discarded and
|
1262
|
+
# # the 3rd byte is read again later.
|
1263
|
+
# # Since the byte is buffered in ec, it is dropped from src.
|
1264
|
+
# ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
|
1265
|
+
# ec.primitive_convert(src="\xd8\x00\x00@", dst="", nil, 10)
|
1266
|
+
# p ec.primitive_errinfo
|
1267
|
+
# #=> [:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00"]
|
1268
|
+
# p src
|
1269
|
+
# #=> "@"
|
1270
|
+
#
|
1271
|
+
# # Similar to UTF-16BE, \x00\xd8@\x00 is invalid as UTF-16LE.
|
1272
|
+
# # The problem is detected by 4th byte.
|
1273
|
+
# ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
|
1274
|
+
# ec.primitive_convert(src="\x00\xd8@\x00", dst="", nil, 10)
|
1275
|
+
# p ec.primitive_errinfo
|
1276
|
+
# #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00"]
|
1277
|
+
# p src
|
1278
|
+
# #=> ""
|
1279
|
+
#
|
1280
|
+
def primitive_errinfo: () -> [convert_result, String?, String?, String?, String?]
|
1281
|
+
|
1282
|
+
# <!--
|
1283
|
+
# rdoc-file=transcode.c
|
1284
|
+
# - ec.putback -> string
|
1285
|
+
# - ec.putback(max_numbytes) -> string
|
1286
|
+
# -->
|
1287
|
+
# Put back the bytes which will be converted.
|
1288
|
+
#
|
1289
|
+
# The bytes are caused by invalid_byte_sequence error. When
|
1290
|
+
# invalid_byte_sequence error, some bytes are discarded and some bytes are
|
1291
|
+
# buffered to be converted later. The latter bytes can be put back. It can be
|
1292
|
+
# observed by Encoding::InvalidByteSequenceError#readagain_bytes and
|
1293
|
+
# Encoding::Converter#primitive_errinfo.
|
1294
|
+
#
|
1295
|
+
# ec = Encoding::Converter.new("utf-16le", "iso-8859-1")
|
1296
|
+
# src = "\x00\xd8\x61\x00"
|
1297
|
+
# dst = ""
|
1298
|
+
# p ec.primitive_convert(src, dst) #=> :invalid_byte_sequence
|
1299
|
+
# p ec.primitive_errinfo #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"]
|
1300
|
+
# p ec.putback #=> "a\x00"
|
1301
|
+
# p ec.putback #=> "" # no more bytes to put back
|
1302
|
+
#
|
1303
|
+
def putback: (?Integer max_numbytes) -> String
|
1304
|
+
|
1305
|
+
# <!--
|
1306
|
+
# rdoc-file=transcode.c
|
1307
|
+
# - ec.replacement -> string
|
1308
|
+
# -->
|
1309
|
+
# Returns the replacement string.
|
1310
|
+
#
|
1311
|
+
# ec = Encoding::Converter.new("euc-jp", "us-ascii")
|
1312
|
+
# p ec.replacement #=> "?"
|
1313
|
+
#
|
1314
|
+
# ec = Encoding::Converter.new("euc-jp", "utf-8")
|
1315
|
+
# p ec.replacement #=> "\uFFFD"
|
1316
|
+
#
|
1317
|
+
def replacement: () -> String
|
1318
|
+
|
1319
|
+
# <!--
|
1320
|
+
# rdoc-file=transcode.c
|
1321
|
+
# - ec.replacement = string
|
1322
|
+
# -->
|
1323
|
+
# Sets the replacement string.
|
1324
|
+
#
|
1325
|
+
# ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
|
1326
|
+
# ec.replacement = "<undef>"
|
1327
|
+
# p ec.convert("a \u3042 b") #=> "a <undef> b"
|
1328
|
+
#
|
1329
|
+
def replacement=: (String str) -> String
|
1330
|
+
|
1331
|
+
# <!--
|
1332
|
+
# rdoc-file=transcode.c
|
1333
|
+
# - ec.source_encoding -> encoding
|
1334
|
+
# -->
|
1335
|
+
# Returns the source encoding as an Encoding object.
|
1336
|
+
#
|
1337
|
+
def source_encoding: () -> Encoding
|
1338
|
+
|
1339
|
+
private
|
1340
|
+
|
1341
|
+
# <!--
|
1342
|
+
# rdoc-file=transcode.c
|
1343
|
+
# - Encoding::Converter.new(source_encoding, destination_encoding)
|
1344
|
+
# - Encoding::Converter.new(source_encoding, destination_encoding, opt)
|
1345
|
+
# - Encoding::Converter.new(convpath)
|
1346
|
+
# -->
|
1347
|
+
# possible options elements:
|
1348
|
+
# hash form:
|
1349
|
+
# :invalid => nil # raise error on invalid byte sequence (default)
|
1350
|
+
# :invalid => :replace # replace invalid byte sequence
|
1351
|
+
# :undef => nil # raise error on undefined conversion (default)
|
1352
|
+
# :undef => :replace # replace undefined conversion
|
1353
|
+
# :replace => string # replacement string ("?" or "\uFFFD" if not specified)
|
1354
|
+
# :newline => :universal # decorator for converting CRLF and CR to LF
|
1355
|
+
# :newline => :crlf # decorator for converting LF to CRLF
|
1356
|
+
# :newline => :cr # decorator for converting LF to CR
|
1357
|
+
# :universal_newline => true # decorator for converting CRLF and CR to LF
|
1358
|
+
# :crlf_newline => true # decorator for converting LF to CRLF
|
1359
|
+
# :cr_newline => true # decorator for converting LF to CR
|
1360
|
+
# :xml => :text # escape as XML CharData.
|
1361
|
+
# :xml => :attr # escape as XML AttValue
|
1362
|
+
# integer form:
|
1363
|
+
# Encoding::Converter::INVALID_REPLACE
|
1364
|
+
# Encoding::Converter::UNDEF_REPLACE
|
1365
|
+
# Encoding::Converter::UNDEF_HEX_CHARREF
|
1366
|
+
# Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
|
1367
|
+
# Encoding::Converter::CRLF_NEWLINE_DECORATOR
|
1368
|
+
# Encoding::Converter::CR_NEWLINE_DECORATOR
|
1369
|
+
# Encoding::Converter::XML_TEXT_DECORATOR
|
1370
|
+
# Encoding::Converter::XML_ATTR_CONTENT_DECORATOR
|
1371
|
+
# Encoding::Converter::XML_ATTR_QUOTE_DECORATOR
|
1372
|
+
#
|
1373
|
+
# Encoding::Converter.new creates an instance of Encoding::Converter.
|
1374
|
+
#
|
1375
|
+
# Source_encoding and destination_encoding should be a string or Encoding
|
1376
|
+
# object.
|
1377
|
+
#
|
1378
|
+
# opt should be nil, a hash or an integer.
|
1379
|
+
#
|
1380
|
+
# convpath should be an array. convpath may contain
|
1381
|
+
# * two-element arrays which contain encodings or encoding names, or
|
1382
|
+
# * strings representing decorator names.
|
1383
|
+
#
|
1384
|
+
#
|
1385
|
+
# Encoding::Converter.new optionally takes an option. The option should be a
|
1386
|
+
# hash or an integer. The option hash can contain :invalid => nil, etc. The
|
1387
|
+
# option integer should be logical-or of constants such as
|
1388
|
+
# Encoding::Converter::INVALID_REPLACE, etc.
|
1389
|
+
#
|
1390
|
+
# :invalid => nil
|
1391
|
+
# : Raise error on invalid byte sequence. This is a default behavior.
|
1392
|
+
# :invalid => :replace
|
1393
|
+
# : Replace invalid byte sequence by replacement string.
|
1394
|
+
# :undef => nil
|
1395
|
+
# : Raise an error if a character in source_encoding is not defined in
|
1396
|
+
# destination_encoding. This is a default behavior.
|
1397
|
+
# :undef => :replace
|
1398
|
+
# : Replace undefined character in destination_encoding with replacement
|
1399
|
+
# string.
|
1400
|
+
# :replace => string
|
1401
|
+
# : Specify the replacement string. If not specified, "uFFFD" is used for
|
1402
|
+
# Unicode encodings and "?" for others.
|
1403
|
+
# :universal_newline => true
|
1404
|
+
# : Convert CRLF and CR to LF.
|
1405
|
+
# :crlf_newline => true
|
1406
|
+
# : Convert LF to CRLF.
|
1407
|
+
# :cr_newline => true
|
1408
|
+
# : Convert LF to CR.
|
1409
|
+
# :xml => :text
|
1410
|
+
# : Escape as XML CharData. This form can be used as an HTML 4.0 #PCDATA.
|
1411
|
+
# * '&' -> '&'
|
1412
|
+
# * '<' -> '<'
|
1413
|
+
# * '>' -> '>'
|
1414
|
+
# * undefined characters in destination_encoding -> hexadecimal CharRef
|
1415
|
+
# such as &#xHH;
|
1416
|
+
#
|
1417
|
+
# :xml => :attr
|
1418
|
+
# : Escape as XML AttValue. The converted result is quoted as "...". This form
|
1419
|
+
# can be used as an HTML 4.0 attribute value.
|
1420
|
+
# * '&' -> '&'
|
1421
|
+
# * '<' -> '<'
|
1422
|
+
# * '>' -> '>'
|
1423
|
+
# * '"' -> '"'
|
1424
|
+
# * undefined characters in destination_encoding -> hexadecimal CharRef
|
1425
|
+
# such as &#xHH;
|
1426
|
+
#
|
1427
|
+
#
|
1428
|
+
#
|
1429
|
+
# Examples:
|
1430
|
+
# # UTF-16BE to UTF-8
|
1431
|
+
# ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
|
1432
|
+
#
|
1433
|
+
# # Usually, decorators such as newline conversion are inserted last.
|
1434
|
+
# ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline => true)
|
1435
|
+
# p ec.convpath #=> [[#<Encoding:UTF-16BE>, #<Encoding:UTF-8>],
|
1436
|
+
# # "universal_newline"]
|
1437
|
+
#
|
1438
|
+
# # But, if the last encoding is ASCII incompatible,
|
1439
|
+
# # decorators are inserted before the last conversion.
|
1440
|
+
# ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline => true)
|
1441
|
+
# p ec.convpath #=> ["crlf_newline",
|
1442
|
+
# # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
|
1443
|
+
#
|
1444
|
+
# # Conversion path can be specified directly.
|
1445
|
+
# ec = Encoding::Converter.new(["universal_newline", ["EUC-JP", "UTF-8"], ["UTF-8", "UTF-16BE"]])
|
1446
|
+
# p ec.convpath #=> ["universal_newline",
|
1447
|
+
# # [#<Encoding:EUC-JP>, #<Encoding:UTF-8>],
|
1448
|
+
# # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
|
1449
|
+
#
|
1450
|
+
def initialize: (encoding source, encoding destination) -> void
|
1451
|
+
| (encoding source, encoding destination,
|
1452
|
+
?invalid: :replace | nil,
|
1453
|
+
?undef: :replace | nil,
|
1454
|
+
?replace: String,
|
1455
|
+
?newline: :universal | :crlf | :cr,
|
1456
|
+
?universal_newline: bool,
|
1457
|
+
?crlf_newline: bool,
|
1458
|
+
?cr_newline: bool,
|
1459
|
+
?xml: :text | :attr
|
1460
|
+
) -> void
|
1461
|
+
| (encoding source, encoding destination, Integer opts) -> void
|
1462
|
+
| (conversion_path convpath) -> void
|
882
1463
|
end
|
883
1464
|
|
884
1465
|
# <!-- rdoc-file=transcode.c -->
|
@@ -995,10 +1576,171 @@ end
|
|
995
1576
|
# contains a byte invalid for the either the source or target encoding.
|
996
1577
|
#
|
997
1578
|
class Encoding::InvalidByteSequenceError < EncodingError
|
1579
|
+
public
|
1580
|
+
|
1581
|
+
# <!--
|
1582
|
+
# rdoc-file=transcode.c
|
1583
|
+
# - ecerr.destination_encoding -> string
|
1584
|
+
# -->
|
1585
|
+
# Returns the destination encoding as an encoding object.
|
1586
|
+
#
|
1587
|
+
def destination_encoding: () -> Encoding
|
1588
|
+
|
1589
|
+
# <!--
|
1590
|
+
# rdoc-file=transcode.c
|
1591
|
+
# - ecerr.destination_encoding_name -> string
|
1592
|
+
# -->
|
1593
|
+
# Returns the destination encoding name as a string.
|
1594
|
+
#
|
1595
|
+
def destination_encoding_name: () -> String
|
1596
|
+
|
1597
|
+
# <!--
|
1598
|
+
# rdoc-file=transcode.c
|
1599
|
+
# - ecerr.error_bytes -> string
|
1600
|
+
# -->
|
1601
|
+
# Returns the discarded bytes when Encoding::InvalidByteSequenceError occurs.
|
1602
|
+
#
|
1603
|
+
# ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
1604
|
+
# begin
|
1605
|
+
# ec.convert("abc\xA1\xFFdef")
|
1606
|
+
# rescue Encoding::InvalidByteSequenceError
|
1607
|
+
# p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "\xFF" on EUC-JP>
|
1608
|
+
# puts $!.error_bytes.dump #=> "\xA1"
|
1609
|
+
# puts $!.readagain_bytes.dump #=> "\xFF"
|
1610
|
+
# end
|
1611
|
+
#
|
1612
|
+
def error_bytes: () -> String
|
1613
|
+
|
1614
|
+
# <!--
|
1615
|
+
# rdoc-file=transcode.c
|
1616
|
+
# - ecerr.incomplete_input? -> true or false
|
1617
|
+
# -->
|
1618
|
+
# Returns true if the invalid byte sequence error is caused by premature end of
|
1619
|
+
# string.
|
1620
|
+
#
|
1621
|
+
# ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
|
1622
|
+
#
|
1623
|
+
# begin
|
1624
|
+
# ec.convert("abc\xA1z")
|
1625
|
+
# rescue Encoding::InvalidByteSequenceError
|
1626
|
+
# p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "z" on EUC-JP>
|
1627
|
+
# p $!.incomplete_input? #=> false
|
1628
|
+
# end
|
1629
|
+
#
|
1630
|
+
# begin
|
1631
|
+
# ec.convert("abc\xA1")
|
1632
|
+
# ec.finish
|
1633
|
+
# rescue Encoding::InvalidByteSequenceError
|
1634
|
+
# p $! #=> #<Encoding::InvalidByteSequenceError: incomplete "\xA1" on EUC-JP>
|
1635
|
+
# p $!.incomplete_input? #=> true
|
1636
|
+
# end
|
1637
|
+
#
|
1638
|
+
def incomplete_input?: () -> bool
|
1639
|
+
|
1640
|
+
# <!--
|
1641
|
+
# rdoc-file=transcode.c
|
1642
|
+
# - ecerr.readagain_bytes -> string
|
1643
|
+
# -->
|
1644
|
+
# Returns the bytes to be read again when Encoding::InvalidByteSequenceError
|
1645
|
+
# occurs.
|
1646
|
+
#
|
1647
|
+
def readagain_bytes: () -> String
|
1648
|
+
|
1649
|
+
# <!--
|
1650
|
+
# rdoc-file=transcode.c
|
1651
|
+
# - ecerr.source_encoding -> encoding
|
1652
|
+
# -->
|
1653
|
+
# Returns the source encoding as an encoding object.
|
1654
|
+
#
|
1655
|
+
# Note that the result may not be equal to the source encoding of the encoding
|
1656
|
+
# converter if the conversion has multiple steps.
|
1657
|
+
#
|
1658
|
+
# ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") # ISO-8859-1 -> UTF-8 -> EUC-JP
|
1659
|
+
# begin
|
1660
|
+
# ec.convert("\xa0") # NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP.
|
1661
|
+
# rescue Encoding::UndefinedConversionError
|
1662
|
+
# p $!.source_encoding #=> #<Encoding:UTF-8>
|
1663
|
+
# p $!.destination_encoding #=> #<Encoding:EUC-JP>
|
1664
|
+
# p $!.source_encoding_name #=> "UTF-8"
|
1665
|
+
# p $!.destination_encoding_name #=> "EUC-JP"
|
1666
|
+
# end
|
1667
|
+
#
|
1668
|
+
def source_encoding: () -> Encoding
|
1669
|
+
|
1670
|
+
# <!--
|
1671
|
+
# rdoc-file=transcode.c
|
1672
|
+
# - ecerr.source_encoding_name -> string
|
1673
|
+
# -->
|
1674
|
+
# Returns the source encoding name as a string.
|
1675
|
+
#
|
1676
|
+
def source_encoding_name: () -> String
|
998
1677
|
end
|
999
1678
|
|
1000
1679
|
# <!-- rdoc-file=transcode.c -->
|
1001
1680
|
# Raised by Encoding and String methods when a transcoding operation fails.
|
1002
1681
|
#
|
1003
1682
|
class Encoding::UndefinedConversionError < EncodingError
|
1683
|
+
public
|
1684
|
+
|
1685
|
+
# <!--
|
1686
|
+
# rdoc-file=transcode.c
|
1687
|
+
# - ecerr.destination_encoding -> string
|
1688
|
+
# -->
|
1689
|
+
# Returns the destination encoding as an encoding object.
|
1690
|
+
#
|
1691
|
+
def destination_encoding: () -> Encoding
|
1692
|
+
|
1693
|
+
# <!--
|
1694
|
+
# rdoc-file=transcode.c
|
1695
|
+
# - ecerr.destination_encoding_name -> string
|
1696
|
+
# -->
|
1697
|
+
# Returns the destination encoding name as a string.
|
1698
|
+
#
|
1699
|
+
def destination_encoding_name: () -> String
|
1700
|
+
|
1701
|
+
# <!--
|
1702
|
+
# rdoc-file=transcode.c
|
1703
|
+
# - ecerr.error_char -> string
|
1704
|
+
# -->
|
1705
|
+
# Returns the one-character string which cause
|
1706
|
+
# Encoding::UndefinedConversionError.
|
1707
|
+
#
|
1708
|
+
# ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP")
|
1709
|
+
# begin
|
1710
|
+
# ec.convert("\xa0")
|
1711
|
+
# rescue Encoding::UndefinedConversionError
|
1712
|
+
# puts $!.error_char.dump #=> "\xC2\xA0"
|
1713
|
+
# p $!.error_char.encoding #=> #<Encoding:UTF-8>
|
1714
|
+
# end
|
1715
|
+
#
|
1716
|
+
def error_char: () -> String
|
1717
|
+
|
1718
|
+
# <!--
|
1719
|
+
# rdoc-file=transcode.c
|
1720
|
+
# - ecerr.source_encoding -> encoding
|
1721
|
+
# -->
|
1722
|
+
# Returns the source encoding as an encoding object.
|
1723
|
+
#
|
1724
|
+
# Note that the result may not be equal to the source encoding of the encoding
|
1725
|
+
# converter if the conversion has multiple steps.
|
1726
|
+
#
|
1727
|
+
# ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") # ISO-8859-1 -> UTF-8 -> EUC-JP
|
1728
|
+
# begin
|
1729
|
+
# ec.convert("\xa0") # NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP.
|
1730
|
+
# rescue Encoding::UndefinedConversionError
|
1731
|
+
# p $!.source_encoding #=> #<Encoding:UTF-8>
|
1732
|
+
# p $!.destination_encoding #=> #<Encoding:EUC-JP>
|
1733
|
+
# p $!.source_encoding_name #=> "UTF-8"
|
1734
|
+
# p $!.destination_encoding_name #=> "EUC-JP"
|
1735
|
+
# end
|
1736
|
+
#
|
1737
|
+
def source_encoding: () -> Encoding
|
1738
|
+
|
1739
|
+
# <!--
|
1740
|
+
# rdoc-file=transcode.c
|
1741
|
+
# - ecerr.source_encoding_name -> string
|
1742
|
+
# -->
|
1743
|
+
# Returns the source encoding name as a string.
|
1744
|
+
#
|
1745
|
+
def source_encoding_name: () -> String
|
1004
1746
|
end
|