rbs 2.1.0 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/core/encoding.rbs CHANGED
@@ -879,6 +879,587 @@ Encoding::Windows_874: Encoding
879
879
  # Encoding conversion class.
880
880
  #
881
881
  class Encoding::Converter < Object
882
+ type encoding = String | Encoding
883
+ type decorator = "universal_newline"
884
+ | "crlf_newline"
885
+ | "cr_newline"
886
+ | "xml_text_escape"
887
+ | "xml_attr_content_escape"
888
+ | "xml_attr_quote"
889
+ type conversion_path = Array[[encoding, encoding] | decorator]
890
+ type convert_result = :invalid_byte_sequence
891
+ | :incomplete_input
892
+ | :undefined_conversion
893
+ | :after_output
894
+ | :destination_buffer_full
895
+ | :source_buffer_empty
896
+ | :finished
897
+
898
+ # <!--
899
+ # rdoc-file=transcode.c
900
+ # - Encoding::Converter.asciicompat_encoding(string) -> encoding or nil
901
+ # - Encoding::Converter.asciicompat_encoding(encoding) -> encoding or nil
902
+ # -->
903
+ # Returns the corresponding ASCII compatible encoding.
904
+ #
905
+ # Returns nil if the argument is an ASCII compatible encoding.
906
+ #
907
+ # "corresponding ASCII compatible encoding" is an ASCII compatible encoding
908
+ # which can represents exactly the same characters as the given ASCII
909
+ # incompatible encoding. So, no conversion undefined error occurs when
910
+ # converting between the two encodings.
911
+ #
912
+ # Encoding::Converter.asciicompat_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP>
913
+ # Encoding::Converter.asciicompat_encoding("UTF-16BE") #=> #<Encoding:UTF-8>
914
+ # Encoding::Converter.asciicompat_encoding("UTF-8") #=> nil
915
+ #
916
+ def self.asciicompat_encoding: (encoding enc) -> Encoding?
917
+
918
+ # <!--
919
+ # rdoc-file=transcode.c
920
+ # - Encoding::Converter.search_convpath(source_encoding, destination_encoding) -> ary
921
+ # - Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt) -> ary
922
+ # -->
923
+ # Returns a conversion path.
924
+ #
925
+ # p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP")
926
+ # #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
927
+ # # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>]]
928
+ #
929
+ # p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true)
930
+ # or
931
+ # p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", newline: :universal)
932
+ # #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
933
+ # # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
934
+ # # "universal_newline"]
935
+ #
936
+ # p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true)
937
+ # or
938
+ # p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", newline: :universal)
939
+ # #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
940
+ # # "universal_newline",
941
+ # # [#<Encoding:UTF-8>, #<Encoding:UTF-32BE>]]
942
+ #
943
+ def self.search_convpath: (
944
+ encoding source,
945
+ encoding destination,
946
+ ?newline: :universal | :crlf | :cr,
947
+ ?universal_newline: bool,
948
+ ?crlf_newline: bool,
949
+ ?cr_newline: bool,
950
+ ?xml: :text | :attr
951
+ ) -> conversion_path
952
+
953
+ public
954
+
955
+ # <!--
956
+ # rdoc-file=transcode.c
957
+ # - ec == other -> true or false
958
+ # -->
959
+ #
960
+ def ==: (self) -> bool
961
+
962
+ # <!--
963
+ # rdoc-file=transcode.c
964
+ # - ec.convert(source_string) -> destination_string
965
+ # -->
966
+ # Convert source_string and return destination_string.
967
+ #
968
+ # source_string is assumed as a part of source. i.e. :partial_input=>true is
969
+ # specified internally. finish method should be used last.
970
+ #
971
+ # ec = Encoding::Converter.new("utf-8", "euc-jp")
972
+ # puts ec.convert("\u3042").dump #=> "\xA4\xA2"
973
+ # puts ec.finish.dump #=> ""
974
+ #
975
+ # ec = Encoding::Converter.new("euc-jp", "utf-8")
976
+ # puts ec.convert("\xA4").dump #=> ""
977
+ # puts ec.convert("\xA2").dump #=> "\xE3\x81\x82"
978
+ # puts ec.finish.dump #=> ""
979
+ #
980
+ # ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
981
+ # puts ec.convert("\xE3").dump #=> "".force_encoding("ISO-2022-JP")
982
+ # puts ec.convert("\x81").dump #=> "".force_encoding("ISO-2022-JP")
983
+ # puts ec.convert("\x82").dump #=> "\e$B$\"".force_encoding("ISO-2022-JP")
984
+ # puts ec.finish.dump #=> "\e(B".force_encoding("ISO-2022-JP")
985
+ #
986
+ # If a conversion error occur, Encoding::UndefinedConversionError or
987
+ # Encoding::InvalidByteSequenceError is raised. Encoding::Converter#convert
988
+ # doesn't supply methods to recover or restart from these exceptions. When you
989
+ # want to handle these conversion errors, use
990
+ # Encoding::Converter#primitive_convert.
991
+ #
992
+ def convert: (String source) -> String
993
+
994
+ # <!--
995
+ # rdoc-file=transcode.c
996
+ # - ec.convpath -> ary
997
+ # -->
998
+ # Returns the conversion path of ec.
999
+ #
1000
+ # The result is an array of conversions.
1001
+ #
1002
+ # ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP", crlf_newline: true)
1003
+ # p ec.convpath
1004
+ # #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
1005
+ # # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
1006
+ # # "crlf_newline"]
1007
+ #
1008
+ # Each element of the array is a pair of encodings or a string. A pair means an
1009
+ # encoding conversion. A string means a decorator.
1010
+ #
1011
+ # In the above example, [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>] means a
1012
+ # converter from ISO-8859-1 to UTF-8. "crlf_newline" means newline converter
1013
+ # from LF to CRLF.
1014
+ #
1015
+ def convpath: () -> conversion_path
1016
+
1017
+ # <!--
1018
+ # rdoc-file=transcode.c
1019
+ # - ec.destination_encoding -> encoding
1020
+ # -->
1021
+ # Returns the destination encoding as an Encoding object.
1022
+ #
1023
+ def destination_encoding: () -> Encoding
1024
+
1025
+ # <!--
1026
+ # rdoc-file=transcode.c
1027
+ # - ec.finish -> string
1028
+ # -->
1029
+ # Finishes the converter. It returns the last part of the converted string.
1030
+ #
1031
+ # ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
1032
+ # p ec.convert("\u3042") #=> "\e$B$\""
1033
+ # p ec.finish #=> "\e(B"
1034
+ #
1035
+ def finish: () -> String
1036
+
1037
+ # <!--
1038
+ # rdoc-file=transcode.c
1039
+ # - ec.insert_output(string) -> nil
1040
+ # -->
1041
+ # Inserts string into the encoding converter. The string will be converted to
1042
+ # the destination encoding and output on later conversions.
1043
+ #
1044
+ # If the destination encoding is stateful, string is converted according to the
1045
+ # state and the state is updated.
1046
+ #
1047
+ # This method should be used only when a conversion error occurs.
1048
+ #
1049
+ # ec = Encoding::Converter.new("utf-8", "iso-8859-1")
1050
+ # src = "HIRAGANA LETTER A is \u{3042}."
1051
+ # dst = ""
1052
+ # p ec.primitive_convert(src, dst) #=> :undefined_conversion
1053
+ # puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is ", "."]
1054
+ # ec.insert_output("<err>")
1055
+ # p ec.primitive_convert(src, dst) #=> :finished
1056
+ # puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is <err>.", ""]
1057
+ #
1058
+ # ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
1059
+ # src = "\u{306F 3041 3068 2661 3002}" # U+2661 is not representable in iso-2022-jp
1060
+ # dst = ""
1061
+ # p ec.primitive_convert(src, dst) #=> :undefined_conversion
1062
+ # puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H".force_encoding("ISO-2022-JP"), "\xE3\x80\x82"]
1063
+ # ec.insert_output "?" # state change required to output "?".
1064
+ # p ec.primitive_convert(src, dst) #=> :finished
1065
+ # puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H\e(B?\e$B!#\e(B".force_encoding("ISO-2022-JP"), ""]
1066
+ #
1067
+ def insert_output: (String) -> nil
1068
+
1069
+ # <!--
1070
+ # rdoc-file=transcode.c
1071
+ # - ec.inspect -> string
1072
+ # -->
1073
+ # Returns a printable version of *ec*
1074
+ #
1075
+ # ec = Encoding::Converter.new("iso-8859-1", "utf-8")
1076
+ # puts ec.inspect #=> #<Encoding::Converter: ISO-8859-1 to UTF-8>
1077
+ #
1078
+ def inspect: () -> String
1079
+
1080
+ # <!--
1081
+ # rdoc-file=transcode.c
1082
+ # - ec.last_error -> exception or nil
1083
+ # -->
1084
+ # Returns an exception object for the last conversion. Returns nil if the last
1085
+ # conversion did not produce an error.
1086
+ #
1087
+ # "error" means that Encoding::InvalidByteSequenceError and
1088
+ # Encoding::UndefinedConversionError for Encoding::Converter#convert and
1089
+ # :invalid_byte_sequence, :incomplete_input and :undefined_conversion for
1090
+ # Encoding::Converter#primitive_convert.
1091
+ #
1092
+ # ec = Encoding::Converter.new("utf-8", "iso-8859-1")
1093
+ # p ec.primitive_convert(src="\xf1abcd", dst="") #=> :invalid_byte_sequence
1094
+ # p ec.last_error #=> #<Encoding::InvalidByteSequenceError: "\xF1" followed by "a" on UTF-8>
1095
+ # p ec.primitive_convert(src, dst, nil, 1) #=> :destination_buffer_full
1096
+ # p ec.last_error #=> nil
1097
+ #
1098
+ def last_error: () -> Encoding::InvalidByteSequenceError?
1099
+ | () -> Encoding::UndefinedConversionError?
1100
+
1101
+ # <!--
1102
+ # rdoc-file=transcode.c
1103
+ # - ec.primitive_convert(source_buffer, destination_buffer) -> symbol
1104
+ # - ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol
1105
+ # - ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
1106
+ # - ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol
1107
+ # -->
1108
+ # possible opt elements:
1109
+ # hash form:
1110
+ # :partial_input => true # source buffer may be part of larger source
1111
+ # :after_output => true # stop conversion after output before input
1112
+ # integer form:
1113
+ # Encoding::Converter::PARTIAL_INPUT
1114
+ # Encoding::Converter::AFTER_OUTPUT
1115
+ #
1116
+ # possible results:
1117
+ # :invalid_byte_sequence
1118
+ # :incomplete_input
1119
+ # :undefined_conversion
1120
+ # :after_output
1121
+ # :destination_buffer_full
1122
+ # :source_buffer_empty
1123
+ # :finished
1124
+ #
1125
+ # primitive_convert converts source_buffer into destination_buffer.
1126
+ #
1127
+ # source_buffer should be a string or nil. nil means an empty string.
1128
+ #
1129
+ # destination_buffer should be a string.
1130
+ #
1131
+ # destination_byteoffset should be an integer or nil. nil means the end of
1132
+ # destination_buffer. If it is omitted, nil is assumed.
1133
+ #
1134
+ # destination_bytesize should be an integer or nil. nil means unlimited. If it
1135
+ # is omitted, nil is assumed.
1136
+ #
1137
+ # opt should be nil, a hash or an integer. nil means no flags. If it is omitted,
1138
+ # nil is assumed.
1139
+ #
1140
+ # primitive_convert converts the content of source_buffer from beginning and
1141
+ # store the result into destination_buffer.
1142
+ #
1143
+ # destination_byteoffset and destination_bytesize specify the region which the
1144
+ # converted result is stored. destination_byteoffset specifies the start
1145
+ # position in destination_buffer in bytes. If destination_byteoffset is nil,
1146
+ # destination_buffer.bytesize is used for appending the result.
1147
+ # destination_bytesize specifies maximum number of bytes. If
1148
+ # destination_bytesize is nil, destination size is unlimited. After conversion,
1149
+ # destination_buffer is resized to destination_byteoffset + actually produced
1150
+ # number of bytes. Also destination_buffer's encoding is set to
1151
+ # destination_encoding.
1152
+ #
1153
+ # primitive_convert drops the converted part of source_buffer. the dropped part
1154
+ # is converted in destination_buffer or buffered in Encoding::Converter object.
1155
+ #
1156
+ # primitive_convert stops conversion when one of following condition met.
1157
+ # * invalid byte sequence found in source buffer (:invalid_byte_sequence)
1158
+ # `primitive_errinfo` and `last_error` methods returns the detail of the
1159
+ # error.
1160
+ # * unexpected end of source buffer (:incomplete_input) this occur only when
1161
+ # :partial_input is not specified. `primitive_errinfo` and `last_error`
1162
+ # methods returns the detail of the error.
1163
+ # * character not representable in output encoding (:undefined_conversion)
1164
+ # `primitive_errinfo` and `last_error` methods returns the detail of the
1165
+ # error.
1166
+ # * after some output is generated, before input is done (:after_output) this
1167
+ # occur only when :after_output is specified.
1168
+ # * destination buffer is full (:destination_buffer_full) this occur only when
1169
+ # destination_bytesize is non-nil.
1170
+ # * source buffer is empty (:source_buffer_empty) this occur only when
1171
+ # :partial_input is specified.
1172
+ # * conversion is finished (:finished)
1173
+ #
1174
+ #
1175
+ # example:
1176
+ # ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
1177
+ # ret = ec.primitive_convert(src="pi", dst="", nil, 100)
1178
+ # p [ret, src, dst] #=> [:finished, "", "\x00p\x00i"]
1179
+ #
1180
+ # ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
1181
+ # ret = ec.primitive_convert(src="pi", dst="", nil, 1)
1182
+ # p [ret, src, dst] #=> [:destination_buffer_full, "i", "\x00"]
1183
+ # ret = ec.primitive_convert(src, dst="", nil, 1)
1184
+ # p [ret, src, dst] #=> [:destination_buffer_full, "", "p"]
1185
+ # ret = ec.primitive_convert(src, dst="", nil, 1)
1186
+ # p [ret, src, dst] #=> [:destination_buffer_full, "", "\x00"]
1187
+ # ret = ec.primitive_convert(src, dst="", nil, 1)
1188
+ # p [ret, src, dst] #=> [:finished, "", "i"]
1189
+ #
1190
+ def primitive_convert: (
1191
+ String? source,
1192
+ String destination,
1193
+ ?Integer? destination_byteoffset,
1194
+ ?Integer? destination_bytesize,
1195
+ ?partial_input: bool,
1196
+ ?after_output: bool
1197
+ ) -> convert_result
1198
+ | (
1199
+ String? source,
1200
+ String destination,
1201
+ ?Integer? destination_byteoffset,
1202
+ ?Integer? destination_bytesize,
1203
+ ?Integer opt
1204
+ ) -> convert_result
1205
+
1206
+ # <!--
1207
+ # rdoc-file=transcode.c
1208
+ # - ec.primitive_errinfo -> array
1209
+ # -->
1210
+ # primitive_errinfo returns important information regarding the last error as a
1211
+ # 5-element array:
1212
+ #
1213
+ # [result, enc1, enc2, error_bytes, readagain_bytes]
1214
+ #
1215
+ # result is the last result of primitive_convert.
1216
+ #
1217
+ # Other elements are only meaningful when result is :invalid_byte_sequence,
1218
+ # :incomplete_input or :undefined_conversion.
1219
+ #
1220
+ # enc1 and enc2 indicate a conversion step as a pair of strings. For example, a
1221
+ # converter from EUC-JP to ISO-8859-1 converts a string as follows: EUC-JP ->
1222
+ # UTF-8 -> ISO-8859-1. So [enc1, enc2] is either ["EUC-JP", "UTF-8"] or
1223
+ # ["UTF-8", "ISO-8859-1"].
1224
+ #
1225
+ # error_bytes and readagain_bytes indicate the byte sequences which caused the
1226
+ # error. error_bytes is discarded portion. readagain_bytes is buffered portion
1227
+ # which is read again on next conversion.
1228
+ #
1229
+ # Example:
1230
+ #
1231
+ # # \xff is invalid as EUC-JP.
1232
+ # ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
1233
+ # ec.primitive_convert(src="\xff", dst="", nil, 10)
1234
+ # p ec.primitive_errinfo
1235
+ # #=> [:invalid_byte_sequence, "EUC-JP", "Shift_JIS", "\xFF", ""]
1236
+ #
1237
+ # # HIRAGANA LETTER A (\xa4\xa2 in EUC-JP) is not representable in ISO-8859-1.
1238
+ # # Since this error is occur in UTF-8 to ISO-8859-1 conversion,
1239
+ # # error_bytes is HIRAGANA LETTER A in UTF-8 (\xE3\x81\x82).
1240
+ # ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
1241
+ # ec.primitive_convert(src="\xa4\xa2", dst="", nil, 10)
1242
+ # p ec.primitive_errinfo
1243
+ # #=> [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", ""]
1244
+ #
1245
+ # # partial character is invalid
1246
+ # ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
1247
+ # ec.primitive_convert(src="\xa4", dst="", nil, 10)
1248
+ # p ec.primitive_errinfo
1249
+ # #=> [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""]
1250
+ #
1251
+ # # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by
1252
+ # # partial characters.
1253
+ # ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
1254
+ # ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
1255
+ # p ec.primitive_errinfo
1256
+ # #=> [:source_buffer_empty, nil, nil, nil, nil]
1257
+ #
1258
+ # # \xd8\x00\x00@ is invalid as UTF-16BE because
1259
+ # # no low surrogate after high surrogate (\xd8\x00).
1260
+ # # It is detected by 3rd byte (\00) which is part of next character.
1261
+ # # So the high surrogate (\xd8\x00) is discarded and
1262
+ # # the 3rd byte is read again later.
1263
+ # # Since the byte is buffered in ec, it is dropped from src.
1264
+ # ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
1265
+ # ec.primitive_convert(src="\xd8\x00\x00@", dst="", nil, 10)
1266
+ # p ec.primitive_errinfo
1267
+ # #=> [:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00"]
1268
+ # p src
1269
+ # #=> "@"
1270
+ #
1271
+ # # Similar to UTF-16BE, \x00\xd8@\x00 is invalid as UTF-16LE.
1272
+ # # The problem is detected by 4th byte.
1273
+ # ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
1274
+ # ec.primitive_convert(src="\x00\xd8@\x00", dst="", nil, 10)
1275
+ # p ec.primitive_errinfo
1276
+ # #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00"]
1277
+ # p src
1278
+ # #=> ""
1279
+ #
1280
+ def primitive_errinfo: () -> [convert_result, String?, String?, String?, String?]
1281
+
1282
+ # <!--
1283
+ # rdoc-file=transcode.c
1284
+ # - ec.putback -> string
1285
+ # - ec.putback(max_numbytes) -> string
1286
+ # -->
1287
+ # Put back the bytes which will be converted.
1288
+ #
1289
+ # The bytes are caused by invalid_byte_sequence error. When
1290
+ # invalid_byte_sequence error, some bytes are discarded and some bytes are
1291
+ # buffered to be converted later. The latter bytes can be put back. It can be
1292
+ # observed by Encoding::InvalidByteSequenceError#readagain_bytes and
1293
+ # Encoding::Converter#primitive_errinfo.
1294
+ #
1295
+ # ec = Encoding::Converter.new("utf-16le", "iso-8859-1")
1296
+ # src = "\x00\xd8\x61\x00"
1297
+ # dst = ""
1298
+ # p ec.primitive_convert(src, dst) #=> :invalid_byte_sequence
1299
+ # p ec.primitive_errinfo #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"]
1300
+ # p ec.putback #=> "a\x00"
1301
+ # p ec.putback #=> "" # no more bytes to put back
1302
+ #
1303
+ def putback: (?Integer max_numbytes) -> String
1304
+
1305
+ # <!--
1306
+ # rdoc-file=transcode.c
1307
+ # - ec.replacement -> string
1308
+ # -->
1309
+ # Returns the replacement string.
1310
+ #
1311
+ # ec = Encoding::Converter.new("euc-jp", "us-ascii")
1312
+ # p ec.replacement #=> "?"
1313
+ #
1314
+ # ec = Encoding::Converter.new("euc-jp", "utf-8")
1315
+ # p ec.replacement #=> "\uFFFD"
1316
+ #
1317
+ def replacement: () -> String
1318
+
1319
+ # <!--
1320
+ # rdoc-file=transcode.c
1321
+ # - ec.replacement = string
1322
+ # -->
1323
+ # Sets the replacement string.
1324
+ #
1325
+ # ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
1326
+ # ec.replacement = "<undef>"
1327
+ # p ec.convert("a \u3042 b") #=> "a <undef> b"
1328
+ #
1329
+ def replacement=: (String str) -> String
1330
+
1331
+ # <!--
1332
+ # rdoc-file=transcode.c
1333
+ # - ec.source_encoding -> encoding
1334
+ # -->
1335
+ # Returns the source encoding as an Encoding object.
1336
+ #
1337
+ def source_encoding: () -> Encoding
1338
+
1339
+ private
1340
+
1341
+ # <!--
1342
+ # rdoc-file=transcode.c
1343
+ # - Encoding::Converter.new(source_encoding, destination_encoding)
1344
+ # - Encoding::Converter.new(source_encoding, destination_encoding, opt)
1345
+ # - Encoding::Converter.new(convpath)
1346
+ # -->
1347
+ # possible options elements:
1348
+ # hash form:
1349
+ # :invalid => nil # raise error on invalid byte sequence (default)
1350
+ # :invalid => :replace # replace invalid byte sequence
1351
+ # :undef => nil # raise error on undefined conversion (default)
1352
+ # :undef => :replace # replace undefined conversion
1353
+ # :replace => string # replacement string ("?" or "\uFFFD" if not specified)
1354
+ # :newline => :universal # decorator for converting CRLF and CR to LF
1355
+ # :newline => :crlf # decorator for converting LF to CRLF
1356
+ # :newline => :cr # decorator for converting LF to CR
1357
+ # :universal_newline => true # decorator for converting CRLF and CR to LF
1358
+ # :crlf_newline => true # decorator for converting LF to CRLF
1359
+ # :cr_newline => true # decorator for converting LF to CR
1360
+ # :xml => :text # escape as XML CharData.
1361
+ # :xml => :attr # escape as XML AttValue
1362
+ # integer form:
1363
+ # Encoding::Converter::INVALID_REPLACE
1364
+ # Encoding::Converter::UNDEF_REPLACE
1365
+ # Encoding::Converter::UNDEF_HEX_CHARREF
1366
+ # Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
1367
+ # Encoding::Converter::CRLF_NEWLINE_DECORATOR
1368
+ # Encoding::Converter::CR_NEWLINE_DECORATOR
1369
+ # Encoding::Converter::XML_TEXT_DECORATOR
1370
+ # Encoding::Converter::XML_ATTR_CONTENT_DECORATOR
1371
+ # Encoding::Converter::XML_ATTR_QUOTE_DECORATOR
1372
+ #
1373
+ # Encoding::Converter.new creates an instance of Encoding::Converter.
1374
+ #
1375
+ # Source_encoding and destination_encoding should be a string or Encoding
1376
+ # object.
1377
+ #
1378
+ # opt should be nil, a hash or an integer.
1379
+ #
1380
+ # convpath should be an array. convpath may contain
1381
+ # * two-element arrays which contain encodings or encoding names, or
1382
+ # * strings representing decorator names.
1383
+ #
1384
+ #
1385
+ # Encoding::Converter.new optionally takes an option. The option should be a
1386
+ # hash or an integer. The option hash can contain :invalid => nil, etc. The
1387
+ # option integer should be logical-or of constants such as
1388
+ # Encoding::Converter::INVALID_REPLACE, etc.
1389
+ #
1390
+ # :invalid => nil
1391
+ # : Raise error on invalid byte sequence. This is a default behavior.
1392
+ # :invalid => :replace
1393
+ # : Replace invalid byte sequence by replacement string.
1394
+ # :undef => nil
1395
+ # : Raise an error if a character in source_encoding is not defined in
1396
+ # destination_encoding. This is a default behavior.
1397
+ # :undef => :replace
1398
+ # : Replace undefined character in destination_encoding with replacement
1399
+ # string.
1400
+ # :replace => string
1401
+ # : Specify the replacement string. If not specified, "uFFFD" is used for
1402
+ # Unicode encodings and "?" for others.
1403
+ # :universal_newline => true
1404
+ # : Convert CRLF and CR to LF.
1405
+ # :crlf_newline => true
1406
+ # : Convert LF to CRLF.
1407
+ # :cr_newline => true
1408
+ # : Convert LF to CR.
1409
+ # :xml => :text
1410
+ # : Escape as XML CharData. This form can be used as an HTML 4.0 #PCDATA.
1411
+ # * '&' -> '&amp;'
1412
+ # * '<' -> '&lt;'
1413
+ # * '>' -> '&gt;'
1414
+ # * undefined characters in destination_encoding -> hexadecimal CharRef
1415
+ # such as &#xHH;
1416
+ #
1417
+ # :xml => :attr
1418
+ # : Escape as XML AttValue. The converted result is quoted as "...". This form
1419
+ # can be used as an HTML 4.0 attribute value.
1420
+ # * '&' -> '&amp;'
1421
+ # * '<' -> '&lt;'
1422
+ # * '>' -> '&gt;'
1423
+ # * '"' -> '&quot;'
1424
+ # * undefined characters in destination_encoding -> hexadecimal CharRef
1425
+ # such as &#xHH;
1426
+ #
1427
+ #
1428
+ #
1429
+ # Examples:
1430
+ # # UTF-16BE to UTF-8
1431
+ # ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
1432
+ #
1433
+ # # Usually, decorators such as newline conversion are inserted last.
1434
+ # ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline => true)
1435
+ # p ec.convpath #=> [[#<Encoding:UTF-16BE>, #<Encoding:UTF-8>],
1436
+ # # "universal_newline"]
1437
+ #
1438
+ # # But, if the last encoding is ASCII incompatible,
1439
+ # # decorators are inserted before the last conversion.
1440
+ # ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline => true)
1441
+ # p ec.convpath #=> ["crlf_newline",
1442
+ # # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
1443
+ #
1444
+ # # Conversion path can be specified directly.
1445
+ # ec = Encoding::Converter.new(["universal_newline", ["EUC-JP", "UTF-8"], ["UTF-8", "UTF-16BE"]])
1446
+ # p ec.convpath #=> ["universal_newline",
1447
+ # # [#<Encoding:EUC-JP>, #<Encoding:UTF-8>],
1448
+ # # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
1449
+ #
1450
+ def initialize: (encoding source, encoding destination) -> void
1451
+ | (encoding source, encoding destination,
1452
+ ?invalid: :replace | nil,
1453
+ ?undef: :replace | nil,
1454
+ ?replace: String,
1455
+ ?newline: :universal | :crlf | :cr,
1456
+ ?universal_newline: bool,
1457
+ ?crlf_newline: bool,
1458
+ ?cr_newline: bool,
1459
+ ?xml: :text | :attr
1460
+ ) -> void
1461
+ | (encoding source, encoding destination, Integer opts) -> void
1462
+ | (conversion_path convpath) -> void
882
1463
  end
883
1464
 
884
1465
  # <!-- rdoc-file=transcode.c -->
@@ -995,10 +1576,171 @@ end
995
1576
  # contains a byte invalid for the either the source or target encoding.
996
1577
  #
997
1578
  class Encoding::InvalidByteSequenceError < EncodingError
1579
+ public
1580
+
1581
+ # <!--
1582
+ # rdoc-file=transcode.c
1583
+ # - ecerr.destination_encoding -> string
1584
+ # -->
1585
+ # Returns the destination encoding as an encoding object.
1586
+ #
1587
+ def destination_encoding: () -> Encoding
1588
+
1589
+ # <!--
1590
+ # rdoc-file=transcode.c
1591
+ # - ecerr.destination_encoding_name -> string
1592
+ # -->
1593
+ # Returns the destination encoding name as a string.
1594
+ #
1595
+ def destination_encoding_name: () -> String
1596
+
1597
+ # <!--
1598
+ # rdoc-file=transcode.c
1599
+ # - ecerr.error_bytes -> string
1600
+ # -->
1601
+ # Returns the discarded bytes when Encoding::InvalidByteSequenceError occurs.
1602
+ #
1603
+ # ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
1604
+ # begin
1605
+ # ec.convert("abc\xA1\xFFdef")
1606
+ # rescue Encoding::InvalidByteSequenceError
1607
+ # p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "\xFF" on EUC-JP>
1608
+ # puts $!.error_bytes.dump #=> "\xA1"
1609
+ # puts $!.readagain_bytes.dump #=> "\xFF"
1610
+ # end
1611
+ #
1612
+ def error_bytes: () -> String
1613
+
1614
+ # <!--
1615
+ # rdoc-file=transcode.c
1616
+ # - ecerr.incomplete_input? -> true or false
1617
+ # -->
1618
+ # Returns true if the invalid byte sequence error is caused by premature end of
1619
+ # string.
1620
+ #
1621
+ # ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
1622
+ #
1623
+ # begin
1624
+ # ec.convert("abc\xA1z")
1625
+ # rescue Encoding::InvalidByteSequenceError
1626
+ # p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "z" on EUC-JP>
1627
+ # p $!.incomplete_input? #=> false
1628
+ # end
1629
+ #
1630
+ # begin
1631
+ # ec.convert("abc\xA1")
1632
+ # ec.finish
1633
+ # rescue Encoding::InvalidByteSequenceError
1634
+ # p $! #=> #<Encoding::InvalidByteSequenceError: incomplete "\xA1" on EUC-JP>
1635
+ # p $!.incomplete_input? #=> true
1636
+ # end
1637
+ #
1638
+ def incomplete_input?: () -> bool
1639
+
1640
+ # <!--
1641
+ # rdoc-file=transcode.c
1642
+ # - ecerr.readagain_bytes -> string
1643
+ # -->
1644
+ # Returns the bytes to be read again when Encoding::InvalidByteSequenceError
1645
+ # occurs.
1646
+ #
1647
+ def readagain_bytes: () -> String
1648
+
1649
+ # <!--
1650
+ # rdoc-file=transcode.c
1651
+ # - ecerr.source_encoding -> encoding
1652
+ # -->
1653
+ # Returns the source encoding as an encoding object.
1654
+ #
1655
+ # Note that the result may not be equal to the source encoding of the encoding
1656
+ # converter if the conversion has multiple steps.
1657
+ #
1658
+ # ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") # ISO-8859-1 -> UTF-8 -> EUC-JP
1659
+ # begin
1660
+ # ec.convert("\xa0") # NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP.
1661
+ # rescue Encoding::UndefinedConversionError
1662
+ # p $!.source_encoding #=> #<Encoding:UTF-8>
1663
+ # p $!.destination_encoding #=> #<Encoding:EUC-JP>
1664
+ # p $!.source_encoding_name #=> "UTF-8"
1665
+ # p $!.destination_encoding_name #=> "EUC-JP"
1666
+ # end
1667
+ #
1668
+ def source_encoding: () -> Encoding
1669
+
1670
+ # <!--
1671
+ # rdoc-file=transcode.c
1672
+ # - ecerr.source_encoding_name -> string
1673
+ # -->
1674
+ # Returns the source encoding name as a string.
1675
+ #
1676
+ def source_encoding_name: () -> String
998
1677
  end
999
1678
 
1000
1679
  # <!-- rdoc-file=transcode.c -->
1001
1680
  # Raised by Encoding and String methods when a transcoding operation fails.
1002
1681
  #
1003
1682
  class Encoding::UndefinedConversionError < EncodingError
1683
+ public
1684
+
1685
+ # <!--
1686
+ # rdoc-file=transcode.c
1687
+ # - ecerr.destination_encoding -> string
1688
+ # -->
1689
+ # Returns the destination encoding as an encoding object.
1690
+ #
1691
+ def destination_encoding: () -> Encoding
1692
+
1693
+ # <!--
1694
+ # rdoc-file=transcode.c
1695
+ # - ecerr.destination_encoding_name -> string
1696
+ # -->
1697
+ # Returns the destination encoding name as a string.
1698
+ #
1699
+ def destination_encoding_name: () -> String
1700
+
1701
+ # <!--
1702
+ # rdoc-file=transcode.c
1703
+ # - ecerr.error_char -> string
1704
+ # -->
1705
+ # Returns the one-character string which cause
1706
+ # Encoding::UndefinedConversionError.
1707
+ #
1708
+ # ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP")
1709
+ # begin
1710
+ # ec.convert("\xa0")
1711
+ # rescue Encoding::UndefinedConversionError
1712
+ # puts $!.error_char.dump #=> "\xC2\xA0"
1713
+ # p $!.error_char.encoding #=> #<Encoding:UTF-8>
1714
+ # end
1715
+ #
1716
+ def error_char: () -> String
1717
+
1718
+ # <!--
1719
+ # rdoc-file=transcode.c
1720
+ # - ecerr.source_encoding -> encoding
1721
+ # -->
1722
+ # Returns the source encoding as an encoding object.
1723
+ #
1724
+ # Note that the result may not be equal to the source encoding of the encoding
1725
+ # converter if the conversion has multiple steps.
1726
+ #
1727
+ # ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") # ISO-8859-1 -> UTF-8 -> EUC-JP
1728
+ # begin
1729
+ # ec.convert("\xa0") # NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP.
1730
+ # rescue Encoding::UndefinedConversionError
1731
+ # p $!.source_encoding #=> #<Encoding:UTF-8>
1732
+ # p $!.destination_encoding #=> #<Encoding:EUC-JP>
1733
+ # p $!.source_encoding_name #=> "UTF-8"
1734
+ # p $!.destination_encoding_name #=> "EUC-JP"
1735
+ # end
1736
+ #
1737
+ def source_encoding: () -> Encoding
1738
+
1739
+ # <!--
1740
+ # rdoc-file=transcode.c
1741
+ # - ecerr.source_encoding_name -> string
1742
+ # -->
1743
+ # Returns the source encoding name as a string.
1744
+ #
1745
+ def source_encoding_name: () -> String
1004
1746
  end