exiftool-vendored.exe 12.80.0 → 12.82.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/README.md +6 -0
  2. package/bin/exiftool.exe +0 -0
  3. package/bin/exiftool_files/Changes +44 -0
  4. package/bin/exiftool_files/Licenses_Strawberry_Perl.zip +0 -0
  5. package/bin/exiftool_files/README +3 -2
  6. package/bin/exiftool_files/exiftool.pl +36 -14
  7. package/bin/exiftool_files/lib/Archive/Zip/Archive.pm +399 -65
  8. package/bin/exiftool_files/lib/Archive/Zip/DirectoryMember.pm +1 -1
  9. package/bin/exiftool_files/lib/Archive/Zip/FileMember.pm +1 -1
  10. package/bin/exiftool_files/lib/Archive/Zip/Member.pm +499 -195
  11. package/bin/exiftool_files/lib/Archive/Zip/NewFileMember.pm +1 -1
  12. package/bin/exiftool_files/lib/Archive/Zip/StringMember.pm +2 -2
  13. package/bin/exiftool_files/lib/Archive/Zip/ZipFileMember.pm +79 -20
  14. package/bin/exiftool_files/lib/Archive/Zip.pm +179 -29
  15. package/bin/exiftool_files/lib/AutoLoader.pm +453 -0
  16. package/bin/exiftool_files/lib/B/Deparse.pm +209 -137
  17. package/bin/exiftool_files/lib/B.pm +1 -1
  18. package/bin/exiftool_files/lib/Benchmark.pm +1123 -0
  19. package/bin/exiftool_files/lib/Class/Struct.pm +2 -2
  20. package/bin/exiftool_files/lib/Compress/Raw/Bzip2.pm +14 -9
  21. package/bin/exiftool_files/lib/Compress/Raw/Lzma.pm +982 -0
  22. package/bin/exiftool_files/lib/Compress/Raw/Zlib.pm +91 -86
  23. package/bin/exiftool_files/lib/Compress/Zlib.pm +105 -100
  24. package/bin/exiftool_files/lib/Config.pm +9 -9
  25. package/bin/exiftool_files/lib/Config_heavy.pl +36 -33
  26. package/bin/exiftool_files/lib/CryptX.pm +2 -82
  27. package/bin/exiftool_files/lib/Data/Dumper.pm +2 -2
  28. package/bin/exiftool_files/lib/Digest/MD5.pm +12 -9
  29. package/bin/exiftool_files/lib/Digest/Perl/MD5.pm +1 -191
  30. package/bin/exiftool_files/lib/Digest/base.pm +26 -20
  31. package/bin/exiftool_files/lib/DynaLoader.pm +7 -4
  32. package/bin/exiftool_files/lib/Encode.pm +3 -3
  33. package/bin/exiftool_files/lib/Errno.pm +13 -13
  34. package/bin/exiftool_files/lib/Exporter/Heavy.pm +2 -2
  35. package/bin/exiftool_files/lib/Exporter.pm +1 -1
  36. package/bin/exiftool_files/lib/ExtUtils/Command/MM.pm +323 -0
  37. package/bin/exiftool_files/lib/ExtUtils/Command.pm +382 -0
  38. package/bin/exiftool_files/lib/File/Find.pm +1 -1
  39. package/bin/exiftool_files/lib/File/Glob.pm +1 -8
  40. package/bin/exiftool_files/lib/File/GlobMapper.pm +2 -2
  41. package/bin/exiftool_files/lib/File/HomeDir/Darwin/Carbon.pm +2 -40
  42. package/bin/exiftool_files/lib/File/HomeDir/Darwin/Cocoa.pm +2 -34
  43. package/bin/exiftool_files/lib/File/HomeDir/Darwin.pm +2 -28
  44. package/bin/exiftool_files/lib/File/HomeDir/Driver.pm +2 -35
  45. package/bin/exiftool_files/lib/File/HomeDir/FreeDesktop.pm +2 -62
  46. package/bin/exiftool_files/lib/File/HomeDir/MacOS9.pm +2 -53
  47. package/bin/exiftool_files/lib/File/HomeDir/Test.pm +2 -43
  48. package/bin/exiftool_files/lib/File/HomeDir/Unix.pm +2 -53
  49. package/bin/exiftool_files/lib/File/HomeDir/Windows.pm +2 -69
  50. package/bin/exiftool_files/lib/File/HomeDir.pm +5 -416
  51. package/bin/exiftool_files/lib/File/Path.pm +3 -3
  52. package/bin/exiftool_files/lib/File/Spec/Win32.pm +2 -2
  53. package/bin/exiftool_files/lib/File/Temp.pm +70 -35
  54. package/bin/exiftool_files/lib/File/Which.pm +1 -240
  55. package/bin/exiftool_files/lib/File/stat.pm +3 -2
  56. package/bin/exiftool_files/lib/IO/Compress/Adapter/Bzip2.pm +16 -17
  57. package/bin/exiftool_files/lib/IO/Compress/Adapter/Deflate.pm +19 -20
  58. package/bin/exiftool_files/lib/IO/Compress/Base/Common.pm +5 -5
  59. package/bin/exiftool_files/lib/IO/Compress/Base.pm +35 -26
  60. package/bin/exiftool_files/lib/IO/Compress/Brotli.pm +159 -0
  61. package/bin/exiftool_files/lib/IO/Compress/Bzip2.pm +50 -25
  62. package/bin/exiftool_files/lib/IO/Compress/Gzip/Constants.pm +6 -6
  63. package/bin/exiftool_files/lib/IO/Compress/Gzip.pm +58 -32
  64. package/bin/exiftool_files/lib/IO/Compress/RawDeflate.pm +63 -38
  65. package/bin/exiftool_files/lib/IO/Compress/Zlib/Extra.pm +20 -20
  66. package/bin/exiftool_files/lib/IO/Dir.pm +1 -1
  67. package/bin/exiftool_files/lib/IO/File.pm +1 -1
  68. package/bin/exiftool_files/lib/IO/Handle.pm +1 -21
  69. package/bin/exiftool_files/lib/IO/Pipe.pm +1 -1
  70. package/bin/exiftool_files/lib/IO/Seekable.pm +1 -1
  71. package/bin/exiftool_files/lib/IO/Select.pm +16 -2
  72. package/bin/exiftool_files/lib/IO/Socket/INET.pm +14 -9
  73. package/bin/exiftool_files/lib/IO/Socket/UNIX.pm +17 -1
  74. package/bin/exiftool_files/lib/IO/Socket.pm +474 -126
  75. package/bin/exiftool_files/lib/IO/String.pm +425 -0
  76. package/bin/exiftool_files/lib/IO/Uncompress/Adapter/Inflate.pm +13 -14
  77. package/bin/exiftool_files/lib/IO/Uncompress/Base.pm +142 -132
  78. package/bin/exiftool_files/lib/IO/Uncompress/Brotli.pm +119 -0
  79. package/bin/exiftool_files/lib/IO/Uncompress/Gunzip.pm +43 -37
  80. package/bin/exiftool_files/lib/IO/Uncompress/RawInflate.pm +49 -43
  81. package/bin/exiftool_files/lib/IO.pm +2 -2
  82. package/bin/exiftool_files/lib/Image/ExifTool/BuildTagLookup.pm +44 -31
  83. package/bin/exiftool_files/lib/Image/ExifTool/CanonVRD.pm +2 -2
  84. package/bin/exiftool_files/lib/Image/ExifTool/FujiFilm.pm +20 -7
  85. package/bin/exiftool_files/lib/Image/ExifTool/GM.pm +543 -0
  86. package/bin/exiftool_files/lib/Image/ExifTool/Geolocation.pm +332 -149
  87. package/bin/exiftool_files/lib/Image/ExifTool/Geotag.pm +9 -4
  88. package/bin/exiftool_files/lib/Image/ExifTool/M2TS.pm +32 -4
  89. package/bin/exiftool_files/lib/Image/ExifTool/MakerNotes.pm +2 -2
  90. package/bin/exiftool_files/lib/Image/ExifTool/Microsoft.pm +1 -1
  91. package/bin/exiftool_files/lib/Image/ExifTool/Nikon.pm +331 -22
  92. package/bin/exiftool_files/lib/Image/ExifTool/NikonCustom.pm +55 -1
  93. package/bin/exiftool_files/lib/Image/ExifTool/Olympus.pm +1 -0
  94. package/bin/exiftool_files/lib/Image/ExifTool/OpenEXR.pm +21 -3
  95. package/bin/exiftool_files/lib/Image/ExifTool/PNG.pm +3 -3
  96. package/bin/exiftool_files/lib/Image/ExifTool/QuickTime.pm +40 -24
  97. package/bin/exiftool_files/lib/Image/ExifTool/QuickTimeStream.pl +61 -30
  98. package/bin/exiftool_files/lib/Image/ExifTool/README +2 -0
  99. package/bin/exiftool_files/lib/Image/ExifTool/Sony.pm +1 -1
  100. package/bin/exiftool_files/lib/Image/ExifTool/TagLookup.pm +4815 -4775
  101. package/bin/exiftool_files/lib/Image/ExifTool/TagNames.pod +931 -617
  102. package/bin/exiftool_files/lib/Image/ExifTool/WriteQuickTime.pl +30 -8
  103. package/bin/exiftool_files/lib/Image/ExifTool/Writer.pl +10 -4
  104. package/bin/exiftool_files/lib/Image/ExifTool/XMP.pm +4 -2
  105. package/bin/exiftool_files/lib/Image/ExifTool.pm +77 -41
  106. package/bin/exiftool_files/lib/Image/ExifTool.pod +24 -11
  107. package/bin/exiftool_files/lib/List/Util.pm +97 -8
  108. package/bin/exiftool_files/lib/MIME/Base64.pm +5 -5
  109. package/bin/exiftool_files/lib/MIME/Charset/_Compat.pm +106 -0
  110. package/bin/exiftool_files/lib/MIME/Charset.pm +1303 -0
  111. package/bin/exiftool_files/lib/Math/BigFloat.pm +444 -27
  112. package/bin/exiftool_files/lib/Math/BigInt/Calc.pm +296 -313
  113. package/bin/exiftool_files/lib/Math/BigInt/FastCalc.pm +1 -1
  114. package/bin/exiftool_files/lib/Math/BigInt/GMP.pm +2 -115
  115. package/bin/exiftool_files/lib/Math/BigInt/LTM.pm +2 -24
  116. package/bin/exiftool_files/lib/Math/BigInt/Lib.pm +61 -32
  117. package/bin/exiftool_files/lib/Math/BigInt.pm +292 -107
  118. package/bin/exiftool_files/lib/POSIX.pm +1 -1
  119. package/bin/exiftool_files/lib/PerlIO/scalar.pm +41 -0
  120. package/bin/exiftool_files/lib/PerlIO.pm +397 -0
  121. package/bin/exiftool_files/lib/Portable/CPAN.pm +94 -94
  122. package/bin/exiftool_files/lib/Portable/Config.pm +94 -94
  123. package/bin/exiftool_files/lib/Portable/FileSpec.pm +180 -180
  124. package/bin/exiftool_files/lib/Portable/HomeDir.pm +110 -110
  125. package/bin/exiftool_files/lib/Portable/LoadYaml.pm +430 -430
  126. package/bin/exiftool_files/lib/Portable/minicpan.pm +55 -55
  127. package/bin/exiftool_files/lib/Portable.pm +246 -320
  128. package/bin/exiftool_files/lib/Scalar/Util.pm +9 -4
  129. package/bin/exiftool_files/lib/Socket.pm +16 -12
  130. package/bin/exiftool_files/lib/Storable.pm +1444 -1441
  131. package/bin/exiftool_files/lib/TAP/Base.pm +133 -0
  132. package/bin/exiftool_files/lib/TAP/Formatter/Base.pm +467 -0
  133. package/bin/exiftool_files/lib/TAP/Formatter/Color.pm +116 -0
  134. package/bin/exiftool_files/lib/TAP/Formatter/Console/ParallelSession.pm +201 -0
  135. package/bin/exiftool_files/lib/TAP/Formatter/Console/Session.pm +205 -0
  136. package/bin/exiftool_files/lib/TAP/Formatter/Console.pm +100 -0
  137. package/bin/exiftool_files/lib/TAP/Formatter/File/Session.pm +95 -0
  138. package/bin/exiftool_files/lib/TAP/Formatter/File.pm +56 -0
  139. package/bin/exiftool_files/lib/TAP/Formatter/Session.pm +220 -0
  140. package/bin/exiftool_files/lib/TAP/Harness/Beyond.pod +426 -0
  141. package/bin/exiftool_files/lib/TAP/Harness/Env.pm +215 -0
  142. package/bin/exiftool_files/lib/TAP/Harness.pm +1054 -0
  143. package/bin/exiftool_files/lib/TAP/Object.pm +155 -0
  144. package/bin/exiftool_files/lib/TAP/Parser/Aggregator.pm +414 -0
  145. package/bin/exiftool_files/lib/TAP/Parser/Grammar.pm +584 -0
  146. package/bin/exiftool_files/lib/TAP/Parser/Iterator/Array.pm +100 -0
  147. package/bin/exiftool_files/lib/TAP/Parser/Iterator/Process.pm +378 -0
  148. package/bin/exiftool_files/lib/TAP/Parser/Iterator/Stream.pm +116 -0
  149. package/bin/exiftool_files/lib/TAP/Parser/Iterator.pm +162 -0
  150. package/bin/exiftool_files/lib/TAP/Parser/IteratorFactory.pm +339 -0
  151. package/bin/exiftool_files/lib/TAP/Parser/Multiplexer.pm +194 -0
  152. package/bin/exiftool_files/lib/TAP/Parser/Result/Bailout.pm +62 -0
  153. package/bin/exiftool_files/lib/TAP/Parser/Result/Comment.pm +60 -0
  154. package/bin/exiftool_files/lib/TAP/Parser/Result/Plan.pm +119 -0
  155. package/bin/exiftool_files/lib/TAP/Parser/Result/Pragma.pm +62 -0
  156. package/bin/exiftool_files/lib/TAP/Parser/Result/Test.pm +271 -0
  157. package/bin/exiftool_files/lib/TAP/Parser/Result/Unknown.pm +48 -0
  158. package/bin/exiftool_files/lib/TAP/Parser/Result/Version.pm +62 -0
  159. package/bin/exiftool_files/lib/TAP/Parser/Result/YAML.pm +61 -0
  160. package/bin/exiftool_files/lib/TAP/Parser/Result.pm +297 -0
  161. package/bin/exiftool_files/lib/TAP/Parser/ResultFactory.pm +183 -0
  162. package/bin/exiftool_files/lib/TAP/Parser/Scheduler/Job.pm +127 -0
  163. package/bin/exiftool_files/lib/TAP/Parser/Scheduler/Spinner.pm +61 -0
  164. package/bin/exiftool_files/lib/TAP/Parser/Scheduler.pm +448 -0
  165. package/bin/exiftool_files/lib/TAP/Parser/Source.pm +381 -0
  166. package/bin/exiftool_files/lib/TAP/Parser/SourceHandler/Executable.pm +184 -0
  167. package/bin/exiftool_files/lib/TAP/Parser/SourceHandler/File.pm +136 -0
  168. package/bin/exiftool_files/lib/TAP/Parser/SourceHandler/Handle.pm +124 -0
  169. package/bin/exiftool_files/lib/TAP/Parser/SourceHandler/Perl.pm +370 -0
  170. package/bin/exiftool_files/lib/TAP/Parser/SourceHandler/RawTAP.pm +130 -0
  171. package/bin/exiftool_files/lib/TAP/Parser/SourceHandler.pm +191 -0
  172. package/bin/exiftool_files/lib/TAP/Parser/YAMLish/Reader.pm +332 -0
  173. package/bin/exiftool_files/lib/TAP/Parser/YAMLish/Writer.pm +254 -0
  174. package/bin/exiftool_files/lib/TAP/Parser.pm +1931 -0
  175. package/bin/exiftool_files/lib/Test/Builder/Formatter.pm +107 -0
  176. package/bin/exiftool_files/lib/Test/Builder/IO/Scalar.pm +659 -0
  177. package/bin/exiftool_files/lib/Test/Builder/Module.pm +182 -0
  178. package/bin/exiftool_files/lib/Test/Builder/Tester/Color.pm +51 -0
  179. package/bin/exiftool_files/lib/Test/Builder/Tester.pm +675 -0
  180. package/bin/exiftool_files/lib/Test/Builder/TodoDiag.pm +68 -0
  181. package/bin/exiftool_files/lib/Test/Builder.pm +2653 -0
  182. package/bin/exiftool_files/lib/Test/Harness.pm +618 -0
  183. package/bin/exiftool_files/lib/Test/More.pm +1997 -0
  184. package/bin/exiftool_files/lib/Test/Simple.pm +220 -0
  185. package/bin/exiftool_files/lib/Test/Tester/Capture.pm +241 -0
  186. package/bin/exiftool_files/lib/Test/Tester/CaptureRunner.pm +79 -0
  187. package/bin/exiftool_files/lib/Test/Tester/Delegate.pm +45 -0
  188. package/bin/exiftool_files/lib/Test/Tester.pm +695 -0
  189. package/bin/exiftool_files/lib/Test/Tutorial.pod +618 -0
  190. package/bin/exiftool_files/lib/Test/use/ok.pm +64 -0
  191. package/bin/exiftool_files/lib/Text/ParseWords.pm +303 -0
  192. package/bin/exiftool_files/lib/Tie/StdHandle.pm +2 -2
  193. package/bin/exiftool_files/lib/Time/HiRes.pm +73 -68
  194. package/bin/exiftool_files/lib/Time/Local.pm +82 -35
  195. package/bin/exiftool_files/lib/Time/Piece.pm +19 -4
  196. package/bin/exiftool_files/lib/Time/Seconds.pm +1 -1
  197. package/bin/exiftool_files/lib/UNIVERSAL.pm +203 -0
  198. package/bin/exiftool_files/lib/Unicode/GCString.pm +60 -0
  199. package/bin/exiftool_files/lib/Unicode/LineBreak/Constants.pm +68 -0
  200. package/bin/exiftool_files/lib/Unicode/LineBreak.pm +248 -0
  201. package/bin/exiftool_files/lib/Win32/API/Struct.pm +1 -177
  202. package/bin/exiftool_files/lib/Win32/API/Type.pm +1 -100
  203. package/bin/exiftool_files/lib/Win32/API.pm +1 -830
  204. package/bin/exiftool_files/lib/Win32/FindFile.pm +2 -123
  205. package/bin/exiftool_files/lib/Win32.pm +213 -89
  206. package/bin/exiftool_files/lib/Win32API/File.pm +1 -1
  207. package/bin/exiftool_files/lib/auto/B/B.xs.dll +0 -0
  208. package/bin/exiftool_files/lib/auto/Compress/Raw/Bzip2/Bzip2.xs.dll +0 -0
  209. package/bin/exiftool_files/lib/auto/Compress/Raw/Lzma/Lzma.xs.dll +0 -0
  210. package/bin/exiftool_files/lib/auto/Compress/Raw/Lzma/autosplit.ix +3 -0
  211. package/bin/exiftool_files/lib/auto/Compress/Raw/Zlib/Zlib.xs.dll +0 -0
  212. package/bin/exiftool_files/lib/auto/CryptX/CryptX.xs.dll +0 -0
  213. package/bin/exiftool_files/lib/auto/Cwd/Cwd.xs.dll +0 -0
  214. package/bin/exiftool_files/lib/auto/Data/Dumper/Dumper.xs.dll +0 -0
  215. package/bin/exiftool_files/lib/auto/Digest/MD5/MD5.xs.dll +0 -0
  216. package/bin/exiftool_files/lib/auto/Digest/SHA/SHA.xs.dll +0 -0
  217. package/bin/exiftool_files/lib/auto/Encode/Encode.xs.dll +0 -0
  218. package/bin/exiftool_files/lib/auto/Fcntl/Fcntl.xs.dll +0 -0
  219. package/bin/exiftool_files/lib/auto/File/Glob/Glob.xs.dll +0 -0
  220. package/bin/exiftool_files/lib/auto/IO/Compress/Brotli/Brotli.xs.dll +0 -0
  221. package/bin/exiftool_files/lib/auto/IO/IO.xs.dll +0 -0
  222. package/bin/exiftool_files/lib/auto/List/Util/Util.xs.dll +0 -0
  223. package/bin/exiftool_files/lib/auto/MIME/Base64/Base64.xs.dll +0 -0
  224. package/bin/exiftool_files/lib/auto/Math/BigInt/FastCalc/FastCalc.xs.dll +0 -0
  225. package/bin/exiftool_files/lib/auto/Math/BigInt/GMP/GMP.xs.dll +0 -0
  226. package/bin/exiftool_files/lib/auto/POSIX/POSIX.xs.dll +0 -0
  227. package/bin/exiftool_files/lib/auto/PerlIO/scalar/scalar.xs.dll +0 -0
  228. package/bin/exiftool_files/lib/auto/Socket/Socket.xs.dll +0 -0
  229. package/bin/exiftool_files/lib/auto/Storable/Storable.xs.dll +0 -0
  230. package/bin/exiftool_files/lib/auto/Time/HiRes/HiRes.xs.dll +0 -0
  231. package/bin/exiftool_files/lib/auto/Time/Piece/Piece.xs.dll +0 -0
  232. package/bin/exiftool_files/lib/auto/Unicode/LineBreak/LineBreak.xs.dll +0 -0
  233. package/bin/exiftool_files/lib/auto/Win32/API/API.xs.dll +0 -0
  234. package/bin/exiftool_files/lib/auto/Win32/FindFile/FindFile.xs.dll +0 -0
  235. package/bin/exiftool_files/lib/auto/Win32/Win32.xs.dll +0 -0
  236. package/bin/exiftool_files/lib/auto/Win32API/File/File.xs.dll +0 -0
  237. package/bin/exiftool_files/lib/auto/mro/mro.xs.dll +0 -0
  238. package/bin/exiftool_files/lib/auto/re/re.xs.dll +0 -0
  239. package/bin/exiftool_files/lib/feature.pm +49 -17
  240. package/bin/exiftool_files/lib/mro.pm +4 -20
  241. package/bin/exiftool_files/lib/overload.pm +15 -15
  242. package/bin/exiftool_files/lib/parent.pm +10 -2
  243. package/bin/exiftool_files/lib/re.pm +91 -33
  244. package/bin/exiftool_files/lib/warnings.pm +17 -6
  245. package/bin/exiftool_files/libgcc_s_seh-1.dll +0 -0
  246. package/bin/exiftool_files/liblzma-5__.dll +0 -0
  247. package/bin/exiftool_files/libstdc++-6.dll +0 -0
  248. package/bin/exiftool_files/libwinpthread-1.dll +0 -0
  249. package/bin/exiftool_files/perl.exe +0 -0
  250. package/bin/exiftool_files/perl532.dll +0 -0
  251. package/package.json +7 -5
  252. package/bin/exiftool_files/libgcc_s_dw2-1.dll +0 -0
  253. package/bin/exiftool_files/perl530.dll +0 -0
@@ -0,0 +1,1303 @@
1
+ #-*- perl -*-
2
+
3
+ package MIME::Charset;
4
+ use 5.005;
5
+
6
+ =head1 NAME
7
+
8
+ MIME::Charset - Charset Information for MIME
9
+
10
+ =head1 SYNOPSIS
11
+
12
+ use MIME::Charset:
13
+
14
+ $charset = MIME::Charset->new("euc-jp");
15
+
16
+ Getting charset information:
17
+
18
+ $benc = $charset->body_encoding; # e.g. "Q"
19
+ $cset = $charset->as_string; # e.g. "US-ASCII"
20
+ $henc = $charset->header_encoding; # e.g. "S"
21
+ $cset = $charset->output_charset; # e.g. "ISO-2022-JP"
22
+
23
+ Translating text data:
24
+
25
+ ($text, $charset, $encoding) =
26
+ $charset->header_encode(
27
+ "\xc9\xc2\xc5\xaa\xc0\xde\xc3\xef\xc5\xaa".
28
+ "\xc7\xd1\xca\xaa\xbd\xd0\xce\xcf\xb4\xef",
29
+ Charset => 'euc-jp');
30
+ # ...returns e.g. (<converted>, "ISO-2022-JP", "B").
31
+
32
+ ($text, $charset, $encoding) =
33
+ $charset->body_encode(
34
+ "Collectioneur path\xe9tiquement ".
35
+ "\xe9clectique de d\xe9chets",
36
+ Charset => 'latin1');
37
+ # ...returns e.g. (<original>, "ISO-8859-1", "QUOTED-PRINTABLE").
38
+
39
+ $len = $charset->encoded_header_len(
40
+ "Perl\xe8\xa8\x80\xe8\xaa\x9e",
41
+ Charset => 'utf-8',
42
+ Encoding => "b");
43
+ # ...returns e.g. 28.
44
+
45
+ Manipulating module defaults:
46
+
47
+ MIME::Charset::alias("csEUCKR", "euc-kr");
48
+ MIME::Charset::default("iso-8859-1");
49
+ MIME::Charset::fallback("us-ascii");
50
+
51
+ Non-OO functions (may be deprecated in near future):
52
+
53
+ use MIME::Charset qw(:info);
54
+
55
+ $benc = body_encoding("iso-8859-2"); # "Q"
56
+ $cset = canonical_charset("ANSI X3.4-1968"); # "US-ASCII"
57
+ $henc = header_encoding("utf-8"); # "S"
58
+ $cset = output_charset("shift_jis"); # "ISO-2022-JP"
59
+
60
+ use MIME::Charset qw(:trans);
61
+
62
+ ($text, $charset, $encoding) =
63
+ header_encode(
64
+ "\xc9\xc2\xc5\xaa\xc0\xde\xc3\xef\xc5\xaa".
65
+ "\xc7\xd1\xca\xaa\xbd\xd0\xce\xcf\xb4\xef",
66
+ "euc-jp");
67
+ # ...returns (<converted>, "ISO-2022-JP", "B");
68
+
69
+ ($text, $charset, $encoding) =
70
+ body_encode(
71
+ "Collectioneur path\xe9tiquement ".
72
+ "\xe9clectique de d\xe9chets",
73
+ "latin1");
74
+ # ...returns (<original>, "ISO-8859-1", "QUOTED-PRINTABLE");
75
+
76
+ $len = encoded_header_len(
77
+ "Perl\xe8\xa8\x80\xe8\xaa\x9e", "b", "utf-8"); # 28
78
+
79
+ =head1 DESCRIPTION
80
+
81
+ MIME::Charset provides information about character sets used for
82
+ MIME messages on Internet.
83
+
84
+ =head2 Definitions
85
+
86
+ The B<charset> is ``character set'' used in MIME to refer to a
87
+ method of converting a sequence of octets into a sequence of characters.
88
+ It includes both concepts of ``coded character set'' (CCS) and
89
+ ``character encoding scheme'' (CES) of ISO/IEC.
90
+
91
+ The B<encoding> is that used in MIME to refer to a method of representing
92
+ a body part or a header body as sequence(s) of printable US-ASCII
93
+ characters.
94
+
95
+ =cut
96
+
97
+ use strict;
98
+ use vars qw(@ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS $Config);
99
+ use Exporter;
100
+ @ISA = qw(Exporter);
101
+ @EXPORT = qw(body_encoding canonical_charset header_encoding output_charset
102
+ body_encode encoded_header_len header_encode);
103
+ @EXPORT_OK = qw(alias default fallback recommended);
104
+ %EXPORT_TAGS = (
105
+ "info" => [qw(body_encoding header_encoding
106
+ canonical_charset output_charset)],
107
+ "trans" =>[ qw(body_encode encoded_header_len
108
+ header_encode)],
109
+ );
110
+ use Carp qw(croak);
111
+
112
+ use constant USE_ENCODE => ($] >= 5.007003)? 'Encode': '';
113
+
114
+ my @ENCODE_SUBS = qw(FB_CROAK FB_PERLQQ FB_HTMLCREF FB_XMLCREF
115
+ is_utf8 resolve_alias);
116
+ if (USE_ENCODE) {
117
+ eval "use ".USE_ENCODE." \@ENCODE_SUBS;";
118
+ if ($@) { # Perl 5.7.3 + Encode 0.40
119
+ eval "use ".USE_ENCODE." qw(is_utf8);";
120
+ require MIME::Charset::_Compat;
121
+ for my $sub (@ENCODE_SUBS) {
122
+ no strict "refs";
123
+ *{$sub} = \&{"MIME::Charset::_Compat::$sub"}
124
+ unless $sub eq 'is_utf8';
125
+ }
126
+ }
127
+ } else {
128
+ require MIME::Charset::_Compat;
129
+ for my $sub (@ENCODE_SUBS) {
130
+ no strict "refs";
131
+ *{$sub} = \&{"MIME::Charset::_Compat::$sub"};
132
+ }
133
+ }
134
+
135
+ $VERSION = '1.012.2';
136
+
137
+ ######## Private Attributes ########
138
+
139
+ my $DEFAULT_CHARSET = 'US-ASCII';
140
+ my $FALLBACK_CHARSET = 'UTF-8';
141
+
142
+ # This table was initially borrowed from Python email package.
143
+
144
+ my %CHARSETS = (# input header enc body enc output conv
145
+ 'ISO-8859-1' => ['Q', 'Q', undef],
146
+ 'ISO-8859-2' => ['Q', 'Q', undef],
147
+ 'ISO-8859-3' => ['Q', 'Q', undef],
148
+ 'ISO-8859-4' => ['Q', 'Q', undef],
149
+ # ISO-8859-5 is Cyrillic, and not especially used
150
+ # ISO-8859-6 is Arabic, also not particularly used
151
+ # ISO-8859-7 is Greek, 'Q' will not make it readable
152
+ # ISO-8859-8 is Hebrew, 'Q' will not make it readable
153
+ 'ISO-8859-9' => ['Q', 'Q', undef],
154
+ 'ISO-8859-10' => ['Q', 'Q', undef],
155
+ # ISO-8859-11 is Thai, 'Q' will not make it readable
156
+ 'ISO-8859-13' => ['Q', 'Q', undef],
157
+ 'ISO-8859-14' => ['Q', 'Q', undef],
158
+ 'ISO-8859-15' => ['Q', 'Q', undef],
159
+ 'ISO-8859-16' => ['Q', 'Q', undef],
160
+ 'WINDOWS-1252' => ['Q', 'Q', undef],
161
+ 'VISCII' => ['Q', 'Q', undef],
162
+ 'US-ASCII' => [undef, undef, undef],
163
+ 'BIG5' => ['B', 'B', undef],
164
+ 'GB2312' => ['B', 'B', undef],
165
+ 'HZ-GB-2312' => ['B', undef, undef],
166
+ 'EUC-JP' => ['B', undef, 'ISO-2022-JP'],
167
+ 'SHIFT_JIS' => ['B', undef, 'ISO-2022-JP'],
168
+ 'ISO-2022-JP' => ['B', undef, undef],
169
+ 'ISO-2022-JP-1' => ['B', undef, undef],
170
+ 'ISO-2022-JP-2' => ['B', undef, undef],
171
+ 'EUC-JISX0213' => ['B', undef, 'ISO-2022-JP-3'],
172
+ 'SHIFT_JISX0213' => ['B', undef, 'ISO-2022-JP-3'],
173
+ 'ISO-2022-JP-3' => ['B', undef, undef],
174
+ 'EUC-JIS-2004' => ['B', undef, 'ISO-2022-JP-2004'],
175
+ 'SHIFT_JIS-2004' => ['B', undef, 'ISO-2022-JP-2004'],
176
+ 'ISO-2022-JP-2004' => ['B', undef, undef],
177
+ 'KOI8-R' => ['B', 'B', undef],
178
+ 'TIS-620' => ['B', 'B', undef], # cf. Mew
179
+ 'UTF-16' => ['B', 'B', undef],
180
+ 'UTF-16BE' => ['B', 'B', undef],
181
+ 'UTF-16LE' => ['B', 'B', undef],
182
+ 'UTF-32' => ['B', 'B', undef],
183
+ 'UTF-32BE' => ['B', 'B', undef],
184
+ 'UTF-32LE' => ['B', 'B', undef],
185
+ 'UTF-7' => ['Q', undef, undef],
186
+ 'UTF-8' => ['S', 'S', undef],
187
+ 'GSM03.38' => [undef, undef, undef], # not for MIME
188
+ # We're making this one up to represent raw unencoded 8bit
189
+ '8BIT' => [undef, 'B', 'ISO-8859-1'],
190
+ );
191
+
192
+ # Fix some unexpected or unpreferred names returned by
193
+ # Encode::resolve_alias() or used by somebodies else.
194
+ my %CHARSET_ALIASES = (# unpreferred preferred
195
+ "ASCII" => "US-ASCII",
196
+ "BIG5-ETEN" => "BIG5",
197
+ "CP1250" => "WINDOWS-1250",
198
+ "CP1251" => "WINDOWS-1251",
199
+ "CP1252" => "WINDOWS-1252",
200
+ "CP1253" => "WINDOWS-1253",
201
+ "CP1254" => "WINDOWS-1254",
202
+ "CP1255" => "WINDOWS-1255",
203
+ "CP1256" => "WINDOWS-1256",
204
+ "CP1257" => "WINDOWS-1257",
205
+ "CP1258" => "WINDOWS-1258",
206
+ "CP874" => "WINDOWS-874",
207
+ "CP936" => "GBK",
208
+ "CP949" => "KS_C_5601-1987",
209
+ "EUC-CN" => "GB2312",
210
+ "HZ" => "HZ-GB-2312", # RFC 1842
211
+ "KS_C_5601" => "KS_C_5601-1987",
212
+ "SHIFTJIS" => "SHIFT_JIS",
213
+ "SHIFTJISX0213" => "SHIFT_JISX0213",
214
+ "TIS620" => "TIS-620", # IANA MIBenum 2259
215
+ "UNICODE-1-1-UTF-7" => "UTF-7", # RFC 1642 (obs.)
216
+ "UTF8" => "UTF-8",
217
+ "UTF-8-STRICT" => "UTF-8", # Perl internal use
218
+ "GSM0338" => "GSM03.38", # not for MIME
219
+ );
220
+
221
+ # Some vendors encode characters beyond standardized mappings using extended
222
+ # encoders. Some other standard encoders need additional encode modules.
223
+ my %ENCODERS = (
224
+ 'EXTENDED' => {
225
+ 'ISO-8859-1' => [['cp1252'], ], # Encode::Byte
226
+ 'ISO-8859-2' => [['cp1250'], ], # Encode::Byte
227
+ 'ISO-8859-5' => [['cp1251'], ], # Encode::Byte
228
+ 'ISO-8859-6' => [
229
+ ['cp1256'], # Encode::Byte
230
+ # ['cp1006'], # ditto, for Farsi
231
+ ],
232
+ 'ISO-8859-6-I'=>[['cp1256'], ], # ditto
233
+ 'ISO-8859-7' => [['cp1253'], ], # Encode::Byte
234
+ 'ISO-8859-8' => [['cp1255'], ], # Encode::Byte
235
+ 'ISO-8859-8-I'=>[['cp1255'], ], # ditto
236
+ 'ISO-8859-9' => [['cp1254'], ], # Encode::Byte
237
+ 'ISO-8859-13'=> [['cp1257'], ], # Encode::Byte
238
+ 'GB2312' => [
239
+ ['gb18030', 'Encode::HanExtra'],
240
+ ['cp936'], # Encode::CN
241
+ ],
242
+ 'EUC-JP' => [
243
+ ['eucJP-ascii', 'Encode::EUCJPASCII'],
244
+ # ['cp51932', 'Encode::EUCJPMS'],
245
+ ],
246
+ 'ISO-2022-JP'=> [
247
+ ['x-iso2022jp-ascii',
248
+ 'Encode::EUCJPASCII'],
249
+ # ['iso-2022-jp-ms','Encode::ISO2022JPMS'],
250
+ # ['cp50220', 'Encode::EUCJPMS'],
251
+ # ['cp50221', 'Encode::EUCJPMS'],
252
+ ['iso-2022-jp-1'], # Encode::JP (note*)
253
+ ],
254
+ 'SHIFT_JIS' => [
255
+ ['cp932'], # Encode::JP
256
+ ],
257
+ 'EUC-JISX0213' => [['euc-jis-2004', 'Encode::JISX0213'], ],
258
+ 'ISO-2022-JP-3' => [['iso-2022-jp-2004', 'Encode::JISX0213'], ],
259
+ 'SHIFT_JISX0213'=> [['shift_jis-2004', 'Encode::ShiftJIS2004'], ],
260
+ 'EUC-KR' => [['cp949'], ], # Encode::KR
261
+ 'BIG5' => [
262
+ # ['big5plus', 'Encode::HanExtra'],
263
+ # ['big5-2003', 'Encode::HanExtra'],
264
+ ['cp950'], # Encode::TW
265
+ # ['big5-1984', 'Encode::HanExtra'],
266
+ ],
267
+ 'TIS-620' => [['cp874'], ], # Encode::Byte
268
+ 'UTF-8' => [['utf8'], ], # Special name on Perl
269
+ },
270
+ 'STANDARD' => {
271
+ 'ISO-8859-6-E' => [['iso-8859-6'],],# Encode::Byte
272
+ 'ISO-8859-6-I' => [['iso-8859-6'],],# ditto
273
+ 'ISO-8859-8-E' => [['iso-8859-8'],],# Encode::Byte
274
+ 'ISO-8859-8-I' => [['iso-8859-8'],],# ditto
275
+ 'GB18030' => [['gb18030', 'Encode::HanExtra'], ],
276
+ 'ISO-2022-JP-2' => [['iso-2022-jp-2','Encode::ISO2022JP2'], ],
277
+ 'EUC-JISX0213' => [['euc-jisx0213', 'Encode::JISX0213'], ],
278
+ 'ISO-2022-JP-3' => [['iso-2022-jp-3', 'Encode::JISX0213'], ],
279
+ 'EUC-JIS-2004' => [['euc-jis-2004', 'Encode::JISX0213'], ],
280
+ 'ISO-2022-JP-2004' => [['iso-2022-jp-2004', 'Encode::JISX0213'], ],
281
+ 'SHIFT_JIS-2004'=> [['shift_jis-2004', 'Encode::ShiftJIS2004'], ],
282
+ 'EUC-TW' => [['euc-tw', 'Encode::HanExtra'], ],
283
+ 'HZ-GB-2312' => [['hz'], ], # Encode::CN
284
+ 'TIS-620' => [['tis620'], ], # (note*)
285
+ 'UTF-16' => [['x-utf16auto', 'MIME::Charset::UTF'],],
286
+ 'UTF-32' => [['x-utf32auto', 'MIME::Charset::UTF'],],
287
+ 'GSM03.38' => [['gsm0338'], ], # Encode::GSM0338
288
+
289
+ # (note*) ISO-8859-11 was not registered by IANA.
290
+ # L<Encode> treats it as canonical name of ``tis-?620''.
291
+ },
292
+ );
293
+
294
+ # ISO-2022-* escape sequences etc. to detect charset from unencoded data.
295
+ my @ESCAPE_SEQS = (
296
+ # ISO-2022-* sequences
297
+ # escape seq, possible charset
298
+ # Following sequences are commonly used.
299
+ ["\033\$\@", "ISO-2022-JP"], # RFC 1468
300
+ ["\033\$B", "ISO-2022-JP"], # ditto
301
+ ["\033(J", "ISO-2022-JP"], # ditto
302
+ ["\033(I", "ISO-2022-JP"], # ditto (nonstandard)
303
+ ["\033\$(D", "ISO-2022-JP"], # RFC 2237 (note*)
304
+ # Following sequences are less commonly used.
305
+ ["\033.A", "ISO-2022-JP-2"], # RFC 1554
306
+ ["\033.F", "ISO-2022-JP-2"], # ditto
307
+ ["\033\$(C", "ISO-2022-JP-2"], # ditto
308
+ ["\033\$(O", "ISO-2022-JP-3"], # JIS X 0213:2000
309
+ ["\033\$(P", "ISO-2022-JP-2004"], # JIS X 0213:2000/2004
310
+ ["\033\$(Q", "ISO-2022-JP-2004"], # JIS X 0213:2004
311
+ ["\033\$)C", "ISO-2022-KR"], # RFC 1557
312
+ ["\033\$)A", "ISO-2022-CN"], # RFC 1922
313
+ ["\033\$A", "ISO-2022-CN"], # ditto (nonstandard)
314
+ ["\033\$)G", "ISO-2022-CN"], # ditto
315
+ ["\033\$*H", "ISO-2022-CN"], # ditto
316
+ # Other sequences will be used with appropriate charset
317
+ # parameters, or hardly used.
318
+
319
+ # note*: This RFC defines ISO-2022-JP-1, superset of
320
+ # ISO-2022-JP. But that charset name is rarely used.
321
+ # OTOH many of encoders for ISO-2022-JP recognize this
322
+ # sequence so that comatibility with EUC-JP will be
323
+ # guaranteed.
324
+
325
+ # Singlebyte 7-bit sequences
326
+ # escape seq, possible charset
327
+ ["\033e", "GSM03.38"], # ESTI GSM 03.38 (note*)
328
+ ["\033\012", "GSM03.38"], # ditto
329
+ ["\033<", "GSM03.38"], # ditto
330
+ ["\033/", "GSM03.38"], # ditto
331
+ ["\033>", "GSM03.38"], # ditto
332
+ ["\033\024", "GSM03.38"], # ditto
333
+ ["\033(", "GSM03.38"], # ditto
334
+ ["\033\@", "GSM03.38"], # ditto
335
+ ["\033)", "GSM03.38"], # ditto
336
+ ["\033=", "GSM03.38"], # ditto
337
+
338
+ # note*: This is not used for MIME message.
339
+ );
340
+
341
+ ######## Public Configuration Attributes ########
342
+
343
+ $Config = {
344
+ Detect7bit => 'YES',
345
+ Mapping => 'EXTENDED',
346
+ Replacement => 'DEFAULT',
347
+ };
348
+ local @INC = @INC;
349
+ pop @INC if $INC[-1] eq '.';
350
+ eval { require MIME::Charset::Defaults; };
351
+
352
+ ######## Private Constants ########
353
+
354
+ my $NON7BITRE = qr{
355
+ [^\x01-\x7e]
356
+ }x;
357
+
358
+ my $NONASCIIRE = qr{
359
+ [^\x09\x0a\x0d\x20\x21-\x7e]
360
+ }x;
361
+
362
+ my $ISO2022RE = qr{
363
+ ISO-2022-.+
364
+ }ix;
365
+
366
+ my $ASCIITRANSRE = qr{
367
+ HZ-GB-2312 | UTF-7
368
+ }ix;
369
+
370
+
371
+ ######## Public Functions ########
372
+
373
+ =head2 Constructor
374
+
375
+ =over
376
+
377
+ =item $charset = MIME::Charset->new([CHARSET [, OPTS]])
378
+
379
+ Create charset object.
380
+
381
+ OPTS may accept following key-value pair.
382
+ B<NOTE>:
383
+ When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
384
+ conversion will not be performed. So this option do not have any effects.
385
+
386
+ =over 4
387
+
388
+ =item Mapping => MAPTYPE
389
+
390
+ Whether to extend mappings actually used for charset names or not.
391
+ C<"EXTENDED"> uses extended mappings.
392
+ C<"STANDARD"> uses standardized strict mappings.
393
+ Default is C<"EXTENDED">.
394
+
395
+ =back
396
+
397
+ =cut
398
+
399
+ sub new {
400
+ my $class = shift;
401
+ my $charset = shift;
402
+ return bless {}, $class unless $charset;
403
+ return bless {}, $class if 75 < length $charset; # w/a for CPAN RT #65796.
404
+ my %params = @_;
405
+ my $mapping = uc($params{'Mapping'} || $Config->{Mapping});
406
+
407
+ if ($charset =~ /\bhz.?gb.?2312$/i) {
408
+ # workaround: "HZ-GB-2312" mistakenly treated as "EUC-CN" by Encode
409
+ # (2.12).
410
+ $charset = "HZ-GB-2312";
411
+ } elsif ($charset =~ /\btis-?620$/i) {
412
+ # workaround: "TIS620" treated as ISO-8859-11 by Encode.
413
+ # And "TIS-620" not known by some versions of Encode (cf.
414
+ # CPAN RT #20781).
415
+ $charset = "TIS-620";
416
+ } else {
417
+ $charset = resolve_alias($charset) || $charset
418
+ }
419
+ $charset = $CHARSET_ALIASES{uc($charset)} || uc($charset);
420
+ my ($henc, $benc, $outcset);
421
+ my $spec = $CHARSETS{$charset};
422
+ if ($spec) {
423
+ ($henc, $benc, $outcset) =
424
+ ($$spec[0], $$spec[1], USE_ENCODE? $$spec[2]: undef);
425
+ } else {
426
+ ($henc, $benc, $outcset) = ('S', 'B', undef);
427
+ }
428
+ my ($decoder, $encoder);
429
+ if (USE_ENCODE) {
430
+ $decoder = _find_encoder($charset, $mapping);
431
+ $encoder = _find_encoder($outcset, $mapping);
432
+ } else {
433
+ $decoder = $encoder = undef;
434
+ }
435
+
436
+ bless {
437
+ InputCharset => $charset,
438
+ Decoder => $decoder,
439
+ HeaderEncoding => $henc,
440
+ BodyEncoding => $benc,
441
+ OutputCharset => ($outcset || $charset),
442
+ Encoder => ($encoder || $decoder),
443
+ }, $class;
444
+ }
445
+
446
+ my %encoder_cache = ();
447
+
448
+ sub _find_encoder($$) {
449
+ my $charset = uc(shift || "");
450
+ return undef unless $charset;
451
+ my $mapping = uc(shift);
452
+ my ($spec, $name, $module, $encoder);
453
+
454
+ local($@);
455
+ $encoder = $encoder_cache{$charset, $mapping};
456
+ return $encoder if ref $encoder;
457
+
458
+ foreach my $m (('EXTENDED', 'STANDARD')) {
459
+ next if $m eq 'EXTENDED' and $mapping ne 'EXTENDED';
460
+ $spec = $ENCODERS{$m}->{$charset};
461
+ next unless $spec;
462
+ foreach my $s (@{$spec}) {
463
+ ($name, $module) = @{$s};
464
+ if ($module) {
465
+ next unless eval "require $module;";
466
+ }
467
+ $encoder = Encode::find_encoding($name);
468
+ last if ref $encoder;
469
+ }
470
+ last if ref $encoder;
471
+ }
472
+ $encoder ||= Encode::find_encoding($charset);
473
+ $encoder_cache{$charset, $mapping} = $encoder if $encoder;
474
+ return $encoder;
475
+ }
476
+
477
+ =back
478
+
479
+ =head2 Getting Information of Charsets
480
+
481
+ =over
482
+
483
+ =item $charset->body_encoding
484
+
485
+ =item body_encoding CHARSET
486
+
487
+ Get recommended transfer-encoding of CHARSET for message body.
488
+
489
+ Returned value will be one of C<"B"> (BASE64), C<"Q"> (QUOTED-PRINTABLE),
490
+ C<"S"> (shorter one of either) or
491
+ C<undef> (might not be transfer-encoded; either 7BIT or 8BIT). This may
492
+ not be same as encoding for message header.
493
+
494
+ =cut
495
+
496
+ sub body_encoding($) {
497
+ my $self = shift;
498
+ return undef unless $self;
499
+ $self = __PACKAGE__->new($self) unless ref $self;
500
+ $self->{BodyEncoding};
501
+ }
502
+
503
+ =item $charset->as_string
504
+
505
+ =item canonical_charset CHARSET
506
+
507
+ Get canonical name for charset.
508
+
509
+ =cut
510
+
511
+ sub canonical_charset($) {
512
+ my $self = shift;
513
+ return undef unless $self;
514
+ $self = __PACKAGE__->new($self) unless ref $self;
515
+ $self->{InputCharset};
516
+ }
517
+
518
+ sub as_string($) {
519
+ my $self = shift;
520
+ $self->{InputCharset};
521
+ }
522
+
523
+ =item $charset->decoder
524
+
525
+ Get L<"Encode::Encoding"> object to decode strings to Unicode by charset.
526
+ If charset is not specified or not known by this module,
527
+ undef will be returned.
528
+
529
+ =cut
530
+
531
+ sub decoder($) {
532
+ my $self = shift;
533
+ $self->{Decoder};
534
+ }
535
+
536
+ =item $charset->dup
537
+
538
+ Get a copy of charset object.
539
+
540
+ =cut
541
+
542
+ sub dup($) {
543
+ my $self = shift;
544
+ my $obj = __PACKAGE__->new(undef);
545
+ %{$obj} = %{$self};
546
+ $obj;
547
+ }
548
+
549
+ =item $charset->encoder([CHARSET])
550
+
551
+ Get L<"Encode::Encoding"> object to encode Unicode string using compatible
552
+ charset recommended to be used for messages on Internet.
553
+
554
+ If optional CHARSET is specified, replace encoder (and output charset
555
+ name) of $charset object with those of CHARSET, therefore,
556
+ $charset object will be a converter between original charset and
557
+ new CHARSET.
558
+
559
+ =cut
560
+
561
+ sub encoder($$;) {
562
+ my $self = shift;
563
+ my $charset = shift;
564
+ if ($charset) {
565
+ $charset = __PACKAGE__->new($charset) unless ref $charset;
566
+ $self->{OutputCharset} = $charset->{InputCharset};
567
+ $self->{Encoder} = $charset->{Decoder};
568
+ $self->{BodyEncoding} = $charset->{BodyEncoding};
569
+ $self->{HeaderEncoding} = $charset->{HeaderEncoding};
570
+ }
571
+ $self->{Encoder};
572
+ }
573
+
574
+ =item $charset->header_encoding
575
+
576
+ =item header_encoding CHARSET
577
+
578
+ Get recommended encoding scheme of CHARSET for message header.
579
+
580
+ Returned value will be one of C<"B">, C<"Q">, C<"S"> (shorter one of either)
581
+ or C<undef> (might not be encoded). This may not be same as encoding
582
+ for message body.
583
+
584
+ =cut
585
+
586
+ sub header_encoding($) {
587
+ my $self = shift;
588
+ return undef unless $self;
589
+ $self = __PACKAGE__->new($self) unless ref $self;
590
+ $self->{HeaderEncoding};
591
+ }
592
+
593
+ =item $charset->output_charset
594
+
595
+ =item output_charset CHARSET
596
+
597
+ Get a charset which is compatible with given CHARSET and is recommended
598
+ to be used for MIME messages on Internet (if it is known by this module).
599
+
600
+ When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
601
+ this function will simply
602
+ return the result of L<"canonical_charset">.
603
+
604
+ =cut
605
+
606
+ sub output_charset($) {
607
+ my $self = shift;
608
+ return undef unless $self;
609
+ $self = __PACKAGE__->new($self) unless ref $self;
610
+ $self->{OutputCharset};
611
+ }
612
+
613
+ =back
614
+
615
+ =head2 Translating Text Data
616
+
617
+ =over
618
+
619
+ =item $charset->body_encode(STRING [, OPTS])
620
+
621
+ =item body_encode STRING, CHARSET [, OPTS]
622
+
623
+ Get converted (if needed) data of STRING and recommended transfer-encoding
624
+ of that data for message body. CHARSET is the charset by which STRING
625
+ is encoded.
626
+
627
+ OPTS may accept following key-value pairs.
628
+ B<NOTE>:
629
+ When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
630
+ conversion will not be performed. So these options do not have any effects.
631
+
632
+ =over 4
633
+
634
+ =item Detect7bit => YESNO
635
+
636
+ Try auto-detecting 7-bit charset when CHARSET is not given.
637
+ Default is C<"YES">.
638
+
639
+ =item Replacement => REPLACEMENT
640
+
641
+ Specifies error handling scheme. See L<"Error Handling">.
642
+
643
+ =back
644
+
645
+ 3-item list of (I<converted string>, I<charset for output>,
646
+ I<transfer-encoding>) will be returned.
647
+ I<Transfer-encoding> will be either C<"BASE64">, C<"QUOTED-PRINTABLE">,
648
+ C<"7BIT"> or C<"8BIT">. If I<charset for output> could not be determined
649
+ and I<converted string> contains non-ASCII byte(s), I<charset for output> will
650
+ be C<undef> and I<transfer-encoding> will be C<"BASE64">.
651
+ I<Charset for output> will be C<"US-ASCII"> if and only if string does not
652
+ contain any non-ASCII bytes.
653
+
654
+ =cut
655
+
656
+ sub body_encode {
657
+ my $self = shift;
658
+ my $text;
659
+ if (ref $self) {
660
+ $text = shift;
661
+ } else {
662
+ $text = $self;
663
+ $self = __PACKAGE__->new(shift);
664
+ }
665
+ my ($encoded, $charset) = $self->_text_encode($text, @_);
666
+ return ($encoded, undef, 'BASE64')
667
+ unless $charset and $charset->{InputCharset};
668
+ my $cset = $charset->{OutputCharset};
669
+
670
+ # Determine transfer-encoding.
671
+ my $enc = $charset->{BodyEncoding};
672
+
673
+ if (!$enc and $encoded !~ /\x00/) { # Eliminate hostile NUL character.
674
+ if ($encoded =~ $NON7BITRE) { # String contains 8bit char(s).
675
+ $enc = '8BIT';
676
+ } elsif ($cset =~ /^($ISO2022RE|$ASCIITRANSRE)$/) { # 7BIT.
677
+ $enc = '7BIT';
678
+ } else { # Pure ASCII.
679
+ $enc = '7BIT';
680
+ $cset = 'US-ASCII';
681
+ }
682
+ } elsif ($enc eq 'S') {
683
+ $enc = _resolve_S($encoded, 1);
684
+ } elsif ($enc eq 'B') {
685
+ $enc = 'BASE64';
686
+ } elsif ($enc eq 'Q') {
687
+ $enc = 'QUOTED-PRINTABLE';
688
+ } else {
689
+ $enc = 'BASE64';
690
+ }
691
+ return ($encoded, $cset, $enc);
692
+ }
693
+
694
+ =item $charset->decode(STRING [,CHECK])
695
+
696
+ Decode STRING to Unicode.
697
+
698
+ B<Note>:
699
+ When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
700
+ this function will die.
701
+
702
+ =cut
703
+
704
+ sub decode($$$;) {
705
+ my $self = shift;
706
+ my $s = shift;
707
+ my $check = shift || 0;
708
+ $self->{Decoder}->decode($s, $check);
709
+ }
710
+
711
+ =item detect_7bit_charset STRING
712
+
713
+ Guess 7-bit charset that may encode a string STRING.
714
+ If STRING contains any 8-bit bytes, C<undef> will be returned.
715
+ Otherwise, Default Charset will be returned for unknown charset.
716
+
717
+ =cut
718
+
719
+ sub detect_7bit_charset($) {
720
+ return $DEFAULT_CHARSET unless &USE_ENCODE;
721
+ my $s = shift;
722
+ return $DEFAULT_CHARSET unless $s;
723
+
724
+ # Non-7bit string
725
+ return undef if $s =~ $NON7BITRE;
726
+
727
+ # Try to detect 7-bit escape sequences.
728
+ foreach (@ESCAPE_SEQS) {
729
+ my ($seq, $cset) = @$_;
730
+ if (index($s, $seq) >= 0) {
731
+ my $decoder = __PACKAGE__->new($cset);
732
+ next unless $decoder->{Decoder};
733
+ eval {
734
+ my $dummy = $s;
735
+ $decoder->decode($dummy, FB_CROAK());
736
+ };
737
+ if ($@) {
738
+ next;
739
+ }
740
+ return $decoder->{InputCharset};
741
+ }
742
+ }
743
+
744
+ # How about HZ, VIQR, UTF-7, ...?
745
+
746
+ return $DEFAULT_CHARSET;
747
+ }
748
+
749
+ sub _detect_7bit_charset {
750
+ detect_7bit_charset(@_);
751
+ }
752
+
753
+ =item $charset->encode(STRING [, CHECK])
754
+
755
+ Encode STRING (Unicode or non-Unicode) using compatible charset recommended
756
+ to be used for messages on Internet (if this module knows it).
757
+ Note that string will be decoded to Unicode then encoded even if compatible charset
758
+ was equal to original charset.
759
+
760
+ B<Note>:
761
+ When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
762
+ this function will die.
763
+
764
+ =cut
765
+
766
+ sub encode($$$;) {
767
+ my $self = shift;
768
+ my $s = shift;
769
+ my $check = shift || 0;
770
+
771
+ unless (is_utf8($s) or $s =~ /[^\x00-\xFF]/) {
772
+ $s = $self->{Decoder}->decode($s, ($check & 0x1)? FB_CROAK(): 0);
773
+ }
774
+ my $enc = $self->{Encoder}->encode($s, $check);
775
+ Encode::_utf8_off($enc) if is_utf8($enc); # workaround for RT #35120
776
+ $enc;
777
+ }
778
+
779
+ =item $charset->encoded_header_len(STRING [, ENCODING])
780
+
781
+ =item encoded_header_len STRING, ENCODING, CHARSET
782
+
783
+ Get length of encoded STRING for message header
784
+ (without folding).
785
+
786
+ ENCODING may be one of C<"B">, C<"Q"> or C<"S"> (shorter
787
+ one of either C<"B"> or C<"Q">).
788
+
789
+ =cut
790
+
791
+ sub encoded_header_len($$$;) {
792
+ my $self = shift;
793
+ my ($encoding, $s);
794
+ if (ref $self) {
795
+ $s = shift;
796
+ $encoding = uc(shift || $self->{HeaderEncoding});
797
+ } else {
798
+ $s = $self;
799
+ $encoding = uc(shift);
800
+ $self = shift;
801
+ $self = __PACKAGE__->new($self) unless ref $self;
802
+ }
803
+
804
+ #FIXME:$encoding === undef
805
+
806
+ my $enclen;
807
+ if ($encoding eq 'Q') {
808
+ $enclen = _enclen_Q($s);
809
+ } elsif ($encoding eq 'S' and _resolve_S($s) eq 'Q') {
810
+ $enclen = _enclen_Q($s);
811
+ } else { # "B"
812
+ $enclen = _enclen_B($s);
813
+ }
814
+
815
+ length($self->{OutputCharset})+$enclen+7;
816
+ }
817
+
818
+ sub _enclen_B($) {
819
+ int((length(shift) + 2) / 3) * 4;
820
+ }
821
+
822
+ sub _enclen_Q($;$) {
823
+ my $s = shift;
824
+ my $in_body = shift;
825
+ my @o;
826
+ if ($in_body) {
827
+ @o = ($s =~ m{([^-\t\r\n !*+/0-9A-Za-z])}go);
828
+ } else {
829
+ @o = ($s =~ m{([^- !*+/0-9A-Za-z])}gos);
830
+ }
831
+ length($s) + scalar(@o) * 2;
832
+ }
833
+
834
+ sub _resolve_S($;$) {
835
+ my $s = shift;
836
+ my $in_body = shift;
837
+ my $e;
838
+ if ($in_body) {
839
+ $e = scalar(() = $s =~ m{[^-\t\r\n !*+/0-9A-Za-z]}g);
840
+ return (length($s) + 8 < $e * 6) ? 'BASE64' : 'QUOTED-PRINTABLE';
841
+ } else {
842
+ $e = scalar(() = $s =~ m{[^- !*+/0-9A-Za-z]}g);
843
+ return (length($s) + 8 < $e * 6) ? 'B' : 'Q';
844
+ }
845
+ }
846
+
847
+ =item $charset->header_encode(STRING [, OPTS])
848
+
849
+ =item header_encode STRING, CHARSET [, OPTS]
850
+
851
+ Get converted (if needed) data of STRING and recommended encoding scheme of
852
+ that data for message headers. CHARSET is the charset by which STRING
853
+ is encoded.
854
+
855
+ OPTS may accept following key-value pairs.
856
+ B<NOTE>:
857
+ When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
858
+ conversion will not be performed. So these options do not have any effects.
859
+
860
+ =over 4
861
+
862
+ =item Detect7bit => YESNO
863
+
864
+ Try auto-detecting 7-bit charset when CHARSET is not given.
865
+ Default is C<"YES">.
866
+
867
+ =item Replacement => REPLACEMENT
868
+
869
+ Specifies error handling scheme. See L<"Error Handling">.
870
+
871
+ =back
872
+
873
+ 3-item list of (I<converted string>, I<charset for output>,
874
+ I<encoding scheme>) will be returned. I<Encoding scheme> will be
875
+ either C<"B">, C<"Q"> or C<undef> (might not be encoded).
876
+ If I<charset for output> could not be determined and I<converted string>
877
+ contains non-ASCII byte(s), I<charset for output> will be C<"8BIT">
878
+ (this is I<not> charset name but a special value to represent unencodable
879
+ data) and I<encoding scheme> will be C<undef> (should not be encoded).
880
+ I<Charset for output> will be C<"US-ASCII"> if and only if string does not
881
+ contain any non-ASCII bytes.
882
+
883
+ =cut
884
+
885
+ sub header_encode {
886
+ my $self = shift;
887
+ my $text;
888
+ if (ref $self) {
889
+ $text = shift;
890
+ } else {
891
+ $text = $self;
892
+ $self = __PACKAGE__->new(shift);
893
+ }
894
+ my ($encoded, $charset) = $self->_text_encode($text, @_);
895
+ return ($encoded, '8BIT', undef)
896
+ unless $charset and $charset->{InputCharset};
897
+ my $cset = $charset->{OutputCharset};
898
+
899
+ # Determine encoding scheme.
900
+ my $enc = $charset->{HeaderEncoding};
901
+
902
+ if (!$enc and $encoded !~ $NON7BITRE) {
903
+ unless ($cset =~ /^($ISO2022RE|$ASCIITRANSRE)$/) { # 7BIT.
904
+ $cset = 'US-ASCII';
905
+ }
906
+ } elsif ($enc eq 'S') {
907
+ $enc = _resolve_S($encoded);
908
+ } elsif ($enc !~ /^[BQ]$/) {
909
+ $enc = 'B';
910
+ }
911
+ return ($encoded, $cset, $enc);
912
+ }
913
+
914
+ sub _text_encode {
915
+ my $charset = shift;
916
+ my $s = shift;
917
+ my %params = @_;
918
+ my $replacement = uc($params{'Replacement'} || $Config->{Replacement});
919
+ my $detect7bit = uc($params{'Detect7bit'} || $Config->{Detect7bit});
920
+ my $encoding = $params{'Encoding'} ||
921
+ (exists $params{'Encoding'}? undef: 'A'); # undocumented
922
+
923
+ if (!$encoding or $encoding ne 'A') { # no 7-bit auto-detection
924
+ $detect7bit = 'NO';
925
+ }
926
+ unless ($charset->{InputCharset}) {
927
+ if ($s =~ $NON7BITRE) {
928
+ return ($s, undef);
929
+ } elsif ($detect7bit ne "NO") {
930
+ $charset = __PACKAGE__->new(&detect_7bit_charset($s));
931
+ } else {
932
+ $charset = __PACKAGE__->new($DEFAULT_CHARSET,
933
+ Mapping => 'STANDARD');
934
+ }
935
+ }
936
+ if (!$encoding or $encoding ne 'A') { # no conversion
937
+ $charset = $charset->dup;
938
+ $charset->encoder($charset);
939
+ $charset->{HeaderEncoding} = $encoding;
940
+ $charset->{BodyEncoding} = $encoding;
941
+ }
942
+ my $check = ($replacement and $replacement =~ /^\d+$/)?
943
+ $replacement:
944
+ {
945
+ 'CROAK' => FB_CROAK(),
946
+ 'STRICT' => FB_CROAK(),
947
+ 'FALLBACK' => FB_CROAK(), # special
948
+ 'PERLQQ' => FB_PERLQQ(),
949
+ 'HTMLCREF' => FB_HTMLCREF(),
950
+ 'XMLCREF' => FB_XMLCREF(),
951
+ }->{$replacement || ""} || 0;
952
+
953
+ # Encode data by output charset if required. If failed, fallback to
954
+ # fallback charset.
955
+ my $encoded;
956
+ if (is_utf8($s) or $s =~ /[^\x00-\xFF]/ or
957
+ ($charset->{InputCharset} || "") ne ($charset->{OutputCharset} || "")) {
958
+ if ($check & 0x1) { # CROAK or FALLBACK
959
+ eval {
960
+ $encoded = $s;
961
+ $encoded = $charset->encode($encoded, FB_CROAK());
962
+ };
963
+ if ($@) {
964
+ if ($replacement eq "FALLBACK" and $FALLBACK_CHARSET) {
965
+ my $cset = __PACKAGE__->new($FALLBACK_CHARSET,
966
+ Mapping => 'STANDARD');
967
+ # croak unknown charset
968
+ croak "unknown charset ``$FALLBACK_CHARSET''"
969
+ unless $cset->{Decoder};
970
+ # charset translation
971
+ $charset = $charset->dup;
972
+ $charset->encoder($cset);
973
+ $encoded = $s;
974
+ $encoded = $charset->encode($encoded, 0);
975
+ # replace input & output charsets with fallback charset
976
+ $cset->encoder($cset);
977
+ $charset = $cset;
978
+ } else {
979
+ $@ =~ s/ at .+$//;
980
+ croak $@;
981
+ }
982
+ }
983
+ } else {
984
+ $encoded = $s;
985
+ $encoded = $charset->encode($encoded, $check);
986
+ }
987
+ } else {
988
+ $encoded = $s;
989
+ }
990
+
991
+ if ($encoded !~ /$NONASCIIRE/) { # maybe ASCII
992
+ # check ``ASCII transformation'' charsets
993
+ if ($charset->{OutputCharset} =~ /^($ASCIITRANSRE)$/) {
994
+ my $u = $encoded;
995
+ if (USE_ENCODE) {
996
+ $u = $charset->encoder->decode($encoded); # dec. by output
997
+ } elsif ($encoded =~ /[+~]/) { # workaround for pre-Encode env.
998
+ $u = "x$u";
999
+ }
1000
+ if ($u eq $encoded) {
1001
+ $charset = $charset->dup;
1002
+ $charset->encoder($DEFAULT_CHARSET);
1003
+ }
1004
+ } elsif ($charset->{OutputCharset} ne "US-ASCII") {
1005
+ $charset = $charset->dup;
1006
+ $charset->encoder($DEFAULT_CHARSET);
1007
+ }
1008
+ }
1009
+
1010
+ return ($encoded, $charset);
1011
+ }
1012
+
1013
+ =item $charset->undecode(STRING [,CHECK])
1014
+
1015
+ Encode Unicode string STRING to byte string by input charset of $charset.
1016
+ This is equivalent to C<$charset-E<gt>decoder-E<gt>encode()>.
1017
+
1018
+ B<Note>:
1019
+ When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
1020
+ this function will die.
1021
+
1022
+ =cut
1023
+
1024
+ sub undecode($$$;) {
1025
+ my $self = shift;
1026
+ my $s = shift;
1027
+ my $check = shift || 0;
1028
+ my $enc = $self->{Decoder}->encode($s, $check);
1029
+ Encode::_utf8_off($enc); # workaround for RT #35120
1030
+ $enc;
1031
+ }
1032
+
1033
+ =back
1034
+
1035
+ =head2 Manipulating Module Defaults
1036
+
1037
+ =over
1038
+
1039
+ =item alias ALIAS [, CHARSET]
1040
+
1041
+ Get/set charset alias for canonical names determined by
1042
+ L<"canonical_charset">.
1043
+
1044
+ If CHARSET is given and isn't false, ALIAS will be assigned as an alias of
1045
+ CHARSET. Otherwise, alias won't be changed. In both cases,
1046
+ current charset name that ALIAS is assigned will be returned.
1047
+
1048
+ =cut
1049
+
1050
+ sub alias ($;$) {
1051
+ my $alias = uc(shift);
1052
+ my $charset = uc(shift);
1053
+
1054
+ return $CHARSET_ALIASES{$alias} unless $charset;
1055
+
1056
+ $CHARSET_ALIASES{$alias} = $charset;
1057
+ return $charset;
1058
+ }
1059
+
1060
+ =item default [CHARSET]
1061
+
1062
+ Get/set default charset.
1063
+
1064
+ B<Default charset> is used by this module when charset context is
1065
+ unknown. Modules using this module are recommended to use this
1066
+ charset when charset context is unknown or implicit default is
1067
+ expected. By default, it is C<"US-ASCII">.
1068
+
1069
+ If CHARSET is given and isn't false, it will be set to default charset.
1070
+ Otherwise, default charset won't be changed. In both cases,
1071
+ current default charset will be returned.
1072
+
1073
+ B<NOTE>: Default charset I<should not> be changed.
1074
+
1075
+ =cut
1076
+
1077
+ sub default(;$) {
1078
+ my $charset = &canonical_charset(shift);
1079
+
1080
+ if ($charset) {
1081
+ croak "Unknown charset '$charset'"
1082
+ unless resolve_alias($charset);
1083
+ $DEFAULT_CHARSET = $charset;
1084
+ }
1085
+ return $DEFAULT_CHARSET;
1086
+ }
1087
+
1088
+ =item fallback [CHARSET]
1089
+
1090
+ Get/set fallback charset.
1091
+
1092
+ B<Fallback charset> is used by this module when conversion by given
1093
+ charset is failed and C<"FALLBACK"> error handling scheme is specified.
1094
+ Modules using this module may use this charset as last resort of charset
1095
+ for conversion. By default, it is C<"UTF-8">.
1096
+
1097
+ If CHARSET is given and isn't false, it will be set to fallback charset.
1098
+ If CHARSET is C<"NONE">, fallback charset will be undefined.
1099
+ Otherwise, fallback charset won't be changed. In any cases,
1100
+ current fallback charset will be returned.
1101
+
1102
+ B<NOTE>: It I<is> useful that C<"US-ASCII"> is specified as fallback charset,
1103
+ since result of conversion will be readable without charset information.
1104
+
1105
+ =cut
1106
+
1107
+ sub fallback(;$) {
1108
+ my $charset = &canonical_charset(shift);
1109
+
1110
+ if ($charset eq "NONE") {
1111
+ $FALLBACK_CHARSET = undef;
1112
+ } elsif ($charset) {
1113
+ croak "Unknown charset '$charset'"
1114
+ unless resolve_alias($charset);
1115
+ $FALLBACK_CHARSET = $charset;
1116
+ }
1117
+ return $FALLBACK_CHARSET;
1118
+ }
1119
+
1120
+ =item recommended CHARSET [, HEADERENC, BODYENC [, ENCCHARSET]]
1121
+
1122
+ Get/set charset profiles.
1123
+
1124
+ If optional arguments are given and any of them are not false, profiles
1125
+ for CHARSET will be set by those arguments. Otherwise, profiles
1126
+ won't be changed. In both cases, current profiles for CHARSET will be
1127
+ returned as 3-item list of (HEADERENC, BODYENC, ENCCHARSET).
1128
+
1129
+ HEADERENC is recommended encoding scheme for message header.
1130
+ It may be one of C<"B">, C<"Q">, C<"S"> (shorter one of either) or
1131
+ C<undef> (might not be encoded).
1132
+
1133
+ BODYENC is recommended transfer-encoding for message body. It may be
1134
+ one of C<"B">, C<"Q">, C<"S"> (shorter one of either) or
1135
+ C<undef> (might not be transfer-encoded).
1136
+
1137
+ ENCCHARSET is a charset which is compatible with given CHARSET and
1138
+ is recommended to be used for MIME messages on Internet.
1139
+ If conversion is not needed (or this module doesn't know appropriate
1140
+ charset), ENCCHARSET is C<undef>.
1141
+
1142
+ B<NOTE>: This function in the future releases can accept more optional
1143
+ arguments (for example, properties to handle character widths, line folding
1144
+ behavior, ...). So format of returned value may probably be changed.
1145
+ Use L<"header_encoding">, L<"body_encoding"> or L<"output_charset"> to get
1146
+ particular profile.
1147
+
1148
+ =cut
1149
+
1150
+ sub recommended ($;$;$;$) {
1151
+ my $charset = &canonical_charset(shift);
1152
+ my $henc = uc(shift) || undef;
1153
+ my $benc = uc(shift) || undef;
1154
+ my $cset = &canonical_charset(shift);
1155
+
1156
+ croak "CHARSET is not specified" unless $charset;
1157
+ croak "Unknown header encoding" unless !$henc or $henc =~ /^[BQS]$/;
1158
+ croak "Unknown body encoding" unless !$benc or $benc =~ /^[BQ]$/;
1159
+
1160
+ if ($henc or $benc or $cset) {
1161
+ $cset = undef if $charset eq $cset;
1162
+ my @spec = ($henc, $benc, USE_ENCODE? $cset: undef);
1163
+ $CHARSETS{$charset} = \@spec;
1164
+ return @spec;
1165
+ } else {
1166
+ $charset = __PACKAGE__->new($charset) unless ref $charset;
1167
+ return map { $charset->{$_} } qw(HeaderEncoding BodyEncoding
1168
+ OutputCharset);
1169
+ }
1170
+ }
1171
+
1172
+ =back
1173
+
1174
+ =head2 Constants
1175
+
1176
+ =over
1177
+
1178
+ =item USE_ENCODE
1179
+
1180
+ Unicode/multibyte support flag.
1181
+ Non-empty string will be set when Unicode and multibyte support is enabled.
1182
+ Currently, this flag will be non-empty on Perl 5.7.3 or later and
1183
+ empty string on earlier versions of Perl.
1184
+
1185
+ =back
1186
+
1187
+ =head2 Error Handling
1188
+
1189
+ L<"body_encode"> and L<"header_encode"> accept following C<Replacement>
1190
+ options:
1191
+
1192
+ =over
1193
+
1194
+ =item C<"DEFAULT">
1195
+
1196
+ Put a substitution character in place of a malformed character.
1197
+ For UCM-based encodings, <subchar> will be used.
1198
+
1199
+ =item C<"FALLBACK">
1200
+
1201
+ Try C<"DEFAULT"> scheme using I<fallback charset> (see L<"fallback">).
1202
+ When fallback charset is undefined and conversion causes error,
1203
+ code will die on error with an error message.
1204
+
1205
+ =item C<"CROAK">
1206
+
1207
+ Code will die on error immediately with an error message.
1208
+ Therefore, you should trap the fatal error with eval{} unless you
1209
+ really want to let it die on error.
1210
+ Synonym is C<"STRICT">.
1211
+
1212
+ =item C<"PERLQQ">
1213
+
1214
+ =item C<"HTMLCREF">
1215
+
1216
+ =item C<"XMLCREF">
1217
+
1218
+ Use C<FB_PERLQQ>, C<FB_HTMLCREF> or C<FB_XMLCREF>
1219
+ scheme defined by L<Encode> module.
1220
+
1221
+ =item numeric values
1222
+
1223
+ Numeric values are also allowed.
1224
+ For more details see L<Encode/Handling Malformed Data>.
1225
+
1226
+ =back
1227
+
1228
+ If error handling scheme is not specified or unknown scheme is specified,
1229
+ C<"DEFAULT"> will be assumed.
1230
+
1231
+ =head2 Configuration File
1232
+
1233
+ Built-in defaults for option parameters can be overridden by configuration
1234
+ file: F<MIME/Charset/Defaults.pm>.
1235
+ For more details read F<MIME/Charset/Defaults.pm.sample>.
1236
+
1237
+ =head1 VERSION
1238
+
1239
+ Consult $VERSION variable.
1240
+
1241
+ Development versions of this module may be found at
1242
+ L<http://hatuka.nezumi.nu/repos/MIME-Charset/>.
1243
+
1244
+ =head2 Incompatible Changes
1245
+
1246
+ =over 4
1247
+
1248
+ =item Release 1.001
1249
+
1250
+ =over 4
1251
+
1252
+ =item *
1253
+
1254
+ new() method returns an object when CHARSET argument is not specified.
1255
+
1256
+ =back
1257
+
1258
+ =item Release 1.005
1259
+
1260
+ =over 4
1261
+
1262
+ =item *
1263
+
1264
+ Restrict characters in encoded-word according to RFC 2047 section 5 (3).
1265
+ This also affects return value of encoded_header_len() method.
1266
+
1267
+ =back
1268
+
1269
+ =item Release 1.008.2
1270
+
1271
+ =over 4
1272
+
1273
+ =item *
1274
+
1275
+ body_encoding() method may also returns C<"S">.
1276
+
1277
+ =item *
1278
+
1279
+ Return value of body_encode() method for UTF-8 may include
1280
+ C<"QUOTED-PRINTABLE"> encoding item that in earlier versions was fixed to
1281
+ C<"BASE64">.
1282
+
1283
+ =back
1284
+
1285
+ =back
1286
+
1287
+ =head1 SEE ALSO
1288
+
1289
+ Multipurpose Internet Mail Extensions (MIME).
1290
+
1291
+ =head1 AUTHOR
1292
+
1293
+ Hatuka*nezumi - IKEDA Soji <hatuka(at)nezumi.nu>
1294
+
1295
+ =head1 COPYRIGHT
1296
+
1297
+ Copyright (C) 2006-2017 Hatuka*nezumi - IKEDA Soji.
1298
+ This program is free software; you can redistribute it and/or modify it
1299
+ under the same terms as Perl itself.
1300
+
1301
+ =cut
1302
+
1303
+ 1;