RubyGems - rabbit-slide-tommy-rubyencoding - Versions diffs - 1.0.0 - Mend

rabbit-slide-tommy-rubyencoding 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/.rabbit +1 -0
data/6752.png +0 -0
data/README.rd +24 -0
data/Rakefile +10 -0
data/RubyEncoding.rab +485 -0
data/blog.png +0 -0
data/config.yaml +16 -0
data/icon.jpg +0 -0
data/icon2.jpg +0 -0
data/pdf/rubyencoding-RubyEncoding.pdf +0 -0
metadata +71 -0

data/.rabbit ADDED Viewed

	@@ -0,0 +1 @@
1	+ RubyEncoding.rab

data/6752.png ADDED Viewed

Binary file

data/README.rd ADDED Viewed

@@ -0,0 +1,24 @@
+= 本当はこわいエンコーディングの話
+東京Ruby会議10 #tkrk10 で発表したスライド
+== 作者向け
+=== 表示
+  rake
+=== 公開
+  rake publish
+== 閲覧者向け
+=== インストール
+  gem install rabbit-slide-tommy-rubyencoding
+=== 表示
+  rabbit rabbit-slide-tommy-rubyencoding.gem

data/Rakefile ADDED Viewed

@@ -0,0 +1,10 @@
+require "rabbit/task/slide"
+# Edit ./config.yaml to customize meta data
+Rabbit::Task::Slide.new do |task|
+  # task.spec.licenses = ["CC BY-SA 3.0"]
+  # task.spec.files += Dir.glob("doc/**/*.*")
+  # task.spec.files -= Dir.glob("private/**/*.*")
+  # task.spec.add_runtime_dependency("YOUR THEME")
+end

data/RubyEncoding.rab ADDED Viewed

@@ -0,0 +1,485 @@
+= 本当は((*こわい*))\nエンコーディングの話
+# : subtitle
+#    サブタイトル
+: author
+  とみたまさひろ
+# : institution
+#    所属
+: content-source
+  東京Ruby会議10
+: date
+  2013-01-13
+: allotted-time
+  15m
+: theme
+  clear-blue+
+= 自己紹介
+    # image
+    # src = icon.jpg
+    # relative-height = 20
+    # align = right
+    # relative-margin-top = -30
+    # relative-margin-right = -10
+  * とみた まさひろ
+    * http://((*tmtms*)).hatenablog.com
+    * https://twitter.com/((*tmtms*))
+  * 好きなもの/環境
+    * ((*Ruby*)), Rabbit, MySQL, Emacs, Git, Ubuntu, ThinkPad
+  * 所属など
+    * 長野県北部在住 / 某社プログラマー / 日本MySQLユーザ会 / 長野ソフトウェア技術者グループ(NSEG)
+= エンコーディング
+= エンコーディングとは
+  * 文字符号化方式
+  * 文字をどのようなバイト列で表現するか
+  * UTF-8 とか EUC-JP とか SHIFT_JIS とかそーゆー奴
+  * 「charset」とか呼ばれたりする
+  * 「文字コード」とか呼ばれたりする
+= 同じバイト列でも別の文字
+((*0xC2 0xA9*)) の2バイトは
+* UTF-8 では「((*©*))」1文字
+* EUC-JP では「((*息*))」1文字
+* SHIFT_JIS では「((*ﾂｩ*))」2文字
+= Ruby 1.8
+* ((*"\xC2\xA9"*)) という文字列は Ruby 的にはただのバイト列
+* エンコーディング情報を持たない
+* "((*©*))"(UTF-8) として扱うか "((*息*))"(EUC-JP) として扱うかはプログラム次第
+* 正規表現にはエンコーディングあり
+  * /〜/n  /〜/s  /〜/u  /〜/e
+= Ruby 1.9
+* 文字列のエンコーディングは文字列自身が知っている
+* "((*©*))"(UTF-8) と "((*息*))"(EUC-JP) は同じバイト列だけど異なる文字列
+* "((*あ*))"(UTF-8) と "((*あ*))"(EUC-JP) は同じ文字を表してるけど等しくない
+* 同じプログラム中で複数のエンコーディングの文字列を同時に扱える(珍しいかも)
+* 正規表現にもエンコーディングあり
+= エンコーディング一覧(('tag:small:(1.9.3)'))
+(('tag:center'))Ruby 自身が持ってるので環境に依存しない
+    ASCII-8BIT Big5 Big5-HKSCS Big5-UAO CP50220 CP50221 CP51932 CP850
+    CP852 CP855 CP949 CP950 CP951 EUC-JP EUC-KR EUC-TW Emacs-Mule
+    GB12345 GB18030 GB1988 GB2312 GBK IBM437 IBM737 IBM775 IBM852
+    IBM855 IBM857 IBM860 IBM861 IBM862 IBM863 IBM864 IBM865 IBM866
+    IBM869 ISO-2022-JP ISO-2022-JP-2 ISO-2022-JP-KDDI ISO-8859-1
+    ISO-8859-10 ISO-8859-11 ISO-8859-13 ISO-8859-14 ISO-8859-15
+    ISO-8859-16 ISO-8859-2 ISO-8859-3 ISO-8859-4 ISO-8859-5 ISO-8859-6
+    ISO-8859-7 ISO-8859-8 ISO-8859-9 KOI8-R KOI8-U MacJapanese
+    SJIS-DoCoMo SJIS-KDDI SJIS-SoftBank Shift_JIS TIS-620 US-ASCII
+    UTF-16 UTF-16BE UTF-16LE UTF-32 UTF-32BE UTF-32LE UTF-7 UTF-8
+    UTF8-DoCoMo UTF8-KDDI UTF8-MAC UTF8-SoftBank Windows-1250
+    Windows-1251 Windows-1252 Windows-1253 Windows-1254 Windows-1255
+    Windows-1256 Windows-1257 Windows-1258 Windows-31J Windows-874
+    eucJP-ms macCentEuro macCroatian macCyrillic macGreek macIceland
+    macRoman macRomania macThai macTurkish macUkraine
+    stateless-ISO-2022-JP stateless-ISO-2022-JP-KDDI
+= うれしいこと
+= 1.8では((*バイト*))単位
+    # coderay ruby
+    "あいう".size     #=> 9
+    "あいう".bytesize #=> 9
+    "あいう".chars{|c| ... }
+    #=> "\xE3","\x81","\x82", ...
+    "あいう"[0]       #=> 0xE3
+    "あいう".reverse
+    #=> "\x86\x81\xE3\x84\x81\xE3\x82\x81\xE3"
+= 1.9では((*文字*))単位
+    # coderay ruby
+    "あいう".size     #=> 3
+    "あいう".bytesize #=> 9
+    "あいう".chars{|c| ... }
+    #=> "あ", "い", "う"
+    "あいう"[0]       #=> "あ"
+    "あいう".reverse  #=> "ういあ"
+= エンコーディング変換
+    # coderay ruby
+    # -*- coding: utf-8 -*-
+    s = "あいう"
+    #=> "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86"
+    s.encoding    #=> #<Encoding:UTF-8>
+    s2 = s.encode("CP932")
+    #=> "\x82\xA0\x82\xA2\x82\xA4"
+    s2.encoding   #=> #<Encoding:Windows-31J>
+= IOで変換してくれる
+    # coderay ruby
+    File.open("cp932.txt", "r:CP932").read
+    #=> CP932 文字列
+    File.open("cp932.txt", "r:CP932:UTF-8").read
+    #=> UTF-8 文字列
+    File.open("cp932.txt").read
+    #=> 環境依存
+= うれしいことばかりじゃない
+= 変換((*先*))にない文字
+    # coderay ruby
+    # -*- coding: utf-8 -*-
+    "あ♥".encode("CP932")
+    #=> Encoding::UndefinedConversionError
+= 変換((*元*))にない文字
+    # coderay ruby
+    # -*- coding: utf-8 -*-
+    "あ\xFF".encode("CP932")
+    #=> Encoding::InvalidByteSequenceError
+= エンコーディングがあっても\n変換できるとは限らない
+    # coderay ruby
+    # -*- coding: utf-8 -*-
+    "あいう".encode("UTF-7")
+    #=> Encoding::ConverterNotFoundError
+= エンコーディングの((*不一致*))
+    # coderay ruby
+    utf8 = "あいう"
+    cp932 = "あ".encode("CP932")
+    utf8.start_with?(cp932)
+    #=> Encoding::CompatibilityError
+= 文字列と正規表現の\nエンコーディングの((*不一致*))
+    # coderay ruby
+    utf8 = "あいう"
+    re = /./s
+    utf8 =~ re
+    #=> Encoding::CompatibilityError
+= エンコーディングが同じでも\n((*不正*))な文字を含んでいる
+    # coderay ruby
+    utf8 = "あ\xFF"
+    utf8 =~ /./
+    #=> invalid byte sequence in UTF-8
+    #   (ArgumentError)
+= IO
+= メソッドによって\nエンコーディングが異なる
+* テキスト読み込み（エンコードあり）
+  * IO#gets
+  * IO#getc
+  * IO#lines
+  * IO#read 等
+* バイナリ読み込み（ASCII-8BIT固定）
+  * IO#read(n)
+  * IO#sysread 等
+= IO#read
+* IO#read(size) は ((*ASCII-8BIT*))
+* IO#read() は((*外部エンコーディング*))依存
+* 引数の有無によって結果のエンコーディングが異なる！
+* なにそれ((*こわい*))
+= 外部エンコーディング
+* ファイル自身は自分の内容のエンコーディングを((*知らない*))
+* ファイルから読み込んだ文字列の Ruby 内でのエンコーディングは何らかの方法で指定する必要がある
+= 引数で指定
+    # coderay ruby
+    File.open(filename, "r:UTF-8")
+    File.read(filename, :encoding=>"UTF-8")
+= 環境変数
+引数で指定されてない場合は環境変数が参照される
+* LC_ALL
+* LC_CTYPE
+* LANG
+= 環境変数による違い
+    # coderay text
+    % cat utf-8.txt
+    あいうえお
+    % export LC_ALL=C
+    % ruby -e 'p File.read("utf-8.txt").size'
+    16
+    % export LC_ALL=ja_JP.UTF-8
+    % ruby -e 'p File.read("utf-8.txt").size'
+    6
+(('tag:center'))環境変数によって動きが変わっちゃう！((*こわい*))
+= 入力時にはエラーにならない
+    # coderay ruby
+    utf8 = File.read("utf8.txt", :encoding=>"UTF-8")
+    # 実は UTF-8 として不正な文字が含まれていて
+      〜〜〜〜〜〜
+    # ずっと後で別のメソッドでエラーになったり
+    utf8 =~ /./
+    #=> invalid byte sequence in UTF-8 (ArgumentError)
+= CGI
+    # coderay ruby
+    require "cgi"
+    cgi = CGI.new
+不正な文字のパラメータを渡すとエラー
+    GET http://example.com/hoge.cgi?fuga=%FF
+    #=> Accept-Charset encoding error (CGI::InvalidEncoding)
+= Rails
+不正な文字のパラメータを渡すとエラー
+    POST http://example.com/posts
+    post[title]=%FF
+    #=> ArgumentError (invalid byte sequence in UTF-8)
+= エラーになりすぎ((*こわい*))!
+= 対処
+= 変換((*先*))にない文字を置換
+    # coderay ruby
+    "あ♥".encode("CP932")
+    #=> Encoding::UndefinedConversionError
+    "あ♥".encode("CP932", :undef=>:replace)
+    #=> "あ?"
+= 変換((*元*))にない文字を置換
+    # coderay ruby
+    "あ\xFF".encode("CP932")
+    #=> Encoding::InvalidByteSequenceError
+    "あ\xFF".encode("CP932", :invalid=>:replace)
+    #=> "あ?"
+= 置換文字の指定
+    # coderay ruby
+    "あ♥".encode("CP932", :undef=>:replace, :replace=>"〓")
+    #=> CP932 で "あ〓"
+= そもそも変換が必要になるようなことをしないのが((*吉*))
+= UTF-8に((*統一*))すれば\nたいていは問題ない
+= UTF-8に統一したつもりでも\n他のエンコーディングが現れることも
+    # coderay ruby
+    File.open(filename, "r:UTF-8").read
+    #=> UTF-8 文字列
+    File.open(filename).read
+    #=> 環境依存
+= いちいち引数で指定する？
+= デフォルト値を指定する
+プログラムで使用するファイルのエンコーディングがすべて同一であれば
+    # coderay ruby
+    Encoding.default_external = "UTF-8"
+    File.read(filename)   #=> UTF-8文字列
+= これで問題ない？
+= ASCII-8BIT
+= メソッドによっては ASCII-8BIT
+    # coderay ruby
+    f = File.open(filename, "r:UTF-8")
+    f.gets        #=> UTF-8
+    f.read(10)    #=> ASCII-8BIT
+= ソケットは ASCII-8BIT
+    # coderay ruby
+    require 'socket'
+    Encoding.default_external = "UTF-8"
+    TCPSocket.new('127.0.0.1', 25).gets
+      #=> ASCII-8BIT
+= ((*気をつける*))しかない
+= ((*不正*))な文字
+= エンコーディングがUTF-8でも\nデータがUTF-8とは限らない
+    # coderay ruby
+    f = File.open("/dev/urandom", "r:UTF-8")
+    str = f.gets
+    str.encoding   #=> #<Encoding:UTF-8>
+    str =~ /./
+    #=> invalid byte sequence in UTF-8 (ArgumentError)
+= 事前に判定
+String#valid_encoding?
+    # coderay ruby
+    f = File.open("/dev/urandom", "r:UTF-8")
+    str = f.gets
+    str.valid_encoding?  #=> false
+= 不正な文字を((*置換したい*))
+* 簡単な方法は((*ない*))
+* String#encode は変換元と変換先が同じ場合は((*何もしない*))
+    # coderay ruby
+    # -*- coding: utf-8 -*-
+    "あ\xFF".encode("CP932", :invalid=>:replace)
+      #=> "あ?"
+    "あ\xFF".encode("UTF-8", :invalid=>:replace)
+      #=> "あ\xFF"
+= Iconv ではできたけど…
+    # coderay ruby
+    require "iconv"
+    src = "あい\xFFうえ"
+    dst = ""
+    iconv = Iconv.new("utf-8", "utf-8")
+    begin
+      dst.concat iconv.iconv(src)
+    rescue Iconv::IllegalSequence => e
+      dst.concat e.success
+      dst.concat "〓"
+      src = e.failed[1..-1]
+      retry
+    end
+    dst   #=> "あい〓うえ"
+(('tag:center'))((*Iconv は 2.0 で廃止*))
+= 同じ文字群を持つ\n別のエンコーディングを経由
+    # coderay ruby
+    "あ\xFF".
+      encode("UTF-16", :invalid=>:replace).
+      encode("UTF-8")
+    #=> "あ�"
+= 2.0.xだとできるようになる？
+    # image
+    # src = 6752.png
+    # relative-height = 100
+= CGI や Rails
+= パラメータが不正な文字で\nエラー
+* 放置でいいんじゃない？
+* 外部からの不正なデータは最前線でエラーになってくれた方がありがたかったり
+* 後の処理に渡されても扱いに困るだけ
+* 実害はログが鬱陶しいくらい？
+= CGIで頑張るなら
+    # coderay ruby
+    require 'cgi'
+    # 一旦 ASCII-8BIT で受けて
+    cgi = CGI.new :accept_charset=>"ASCII-8BIT"
+    # パラメータ毎にどうにかする
+    hoge = cgi['hoge'].encode("UTF-8", "UTF-8")
+    unless hoge.valid_encoding?
+      ...
+= Railsは...
+(('tag:center'))よくわかりません (><)
+= おまけ
+= ASCII文字だけなら\n異なるエンコーディングでも\nエラーにならない
+    # coderay ruby
+    # -*- coding: utf-8 -*-
+    "あ" =~ /./s  #=> Encoding::CompatibilityError
+    "ABC" =~ /./s #=> エラーにならない
+ASCII文字だけでテストしてると本番でエラーになったり((*こわい*))
+= まとめ
+* 内部のエンコーディングは UTF-8 に統一しよう
+* IO 読み込みで ASCII-8BIT になってることがある
+* 外部からのデータは不正な文字が入ってることがある
+(('tag:center'))((*ちゃんと気をつければ(('tag:x-small:そんなに'))こわくない*))

data/blog.png ADDED Viewed

Binary file

data/config.yaml ADDED Viewed

@@ -0,0 +1,16 @@
+---
+id: rubyencoding
+base_name: RubyEncoding
+tags: []
+presentation_date:
+version: 1.0.0
+licenses: []
+slideshare_id:
+speaker_deck_id:
+author:
+  markup_language: :rd
+  name: TOMITA Masahiro
+  email: tommy@tmtm.org
+  rubygems_user: tommy
+  slideshare_user: tmtm
+  speaker_deck_user:

data/icon.jpg ADDED Viewed

Binary file

data/icon2.jpg ADDED Viewed

Binary file

data/pdf/rubyencoding-RubyEncoding.pdf ADDED Viewed

Binary file

metadata ADDED Viewed

@@ -0,0 +1,71 @@
+--- !ruby/object:Gem::Specification
+name: rabbit-slide-tommy-rubyencoding
+version: !ruby/object:Gem::Version
+  version: 1.0.0
+  prerelease:
+platform: ruby
+authors:
+- TOMITA Masahiro
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-01-13 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rabbit
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 2.0.2
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 2.0.2
+description: ! '東京Ruby会議10 #tkrk10 で発表したスライド'
+email:
+- tommy@tmtm.org
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .rabbit
+- config.yaml
+- Rakefile
+- README.rd
+- 6752.png
+- blog.png
+- icon2.jpg
+- icon.jpg
+- RubyEncoding.rab
+- pdf/rubyencoding-RubyEncoding.pdf
+homepage: http://slide.rabbit-shocker.org/authors/tommy/rubyencoding/
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.23
+signing_key:
+specification_version: 3
+summary: 本当はこわいエンコーディングの話
+test_files: []