active_record_encoding 0.10.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +21 -0
- data/lib/active_record_encoding.rb +242 -0
- metadata +65 -0
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2009, Michael H. Buselli
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
* Redistributions of source code must retain the above copyright
|
7
|
+
notice, this list of conditions and the following disclaimer.
|
8
|
+
* Redistributions in binary form must reproduce the above copyright
|
9
|
+
notice, this list of conditions and the following disclaimer in the
|
10
|
+
documentation and/or other materials provided with the distribution.
|
11
|
+
|
12
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ''AS IS'' AND ANY
|
13
|
+
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
14
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
15
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
|
16
|
+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
17
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
18
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
19
|
+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
20
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
21
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@@ -0,0 +1,242 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2009, Michael H. Buselli
|
3
|
+
# See LICENSE for details. All other rights reserved.
|
4
|
+
#
|
5
|
+
#######
|
6
|
+
|
7
|
+
#
|
8
|
+
# ActiveRecordEncoding — Module to make ActiveRecord aware of Unicode
|
9
|
+
# encoding issues. It should be used only if the underlying database
|
10
|
+
# and its driver does not or cannot properly handle the encoding of the
|
11
|
+
# data it returns (usually as "ASCII-8BIT"). Most databases can
|
12
|
+
# properly encode data, however, so your first assumption should be that
|
13
|
+
# you do not need this software unless you really know you need it.
|
14
|
+
#
|
15
|
+
# ActiveRecordEncoding keeps a variables for each column and table
|
16
|
+
# where encoding is requested so it knows how the data is encoded in the
|
17
|
+
# database. This variable is called its external_encoding.
|
18
|
+
#
|
19
|
+
# External encodings must be defined for each column or table where
|
20
|
+
# a translation is to occur, and this is done in the model definition:
|
21
|
+
#
|
22
|
+
# class User < ActiveRecord::Base
|
23
|
+
# external_encoding 'ISO-8859-1', :for => :comment
|
24
|
+
# external_encoding 'ISO-8859-1', :for => [:first_name, :last_name]
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# Data is converted to UTF-8 when passed to the user.
|
28
|
+
#
|
29
|
+
# When data is being saved back to the database, it is assumed that the
|
30
|
+
# data is in UTF-8, or on Ruby 1.9 if the String's encoding value is
|
31
|
+
# something other than "ASCII-8BIT" it uses that encoding.
|
32
|
+
#
|
33
|
+
module ActiveRecordEncoding
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
# Use Iconv if String objects don't know about #encoding. This is so
|
38
|
+
# the library functions on Ruby 1.8.
|
39
|
+
if not ''.respond_to? :encoding
|
40
|
+
require 'iconv'
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
#
|
45
|
+
# StandardClassMethods defines class methods for inclusion in
|
46
|
+
# ActiveRecord::Base in order to provide the user interface for
|
47
|
+
# ActiveRecordEncoding.
|
48
|
+
#
|
49
|
+
module ActiveRecordEncoding::StandardClassMethods
|
50
|
+
|
51
|
+
#
|
52
|
+
# Set the external_encoding value for this model class.
|
53
|
+
#
|
54
|
+
# class User < ActiveRecord::Base
|
55
|
+
# external_encoding 'ISO-8859-1' # affect all binary columns
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# When data is retrieved from the database, it will be assumed it is
|
59
|
+
# encoded in the given format.
|
60
|
+
#
|
61
|
+
# This may also be called with the :for option pointing to one or more
|
62
|
+
# specific columns that this call applies to:
|
63
|
+
#
|
64
|
+
# class User < ActiveRecord::Base
|
65
|
+
# external_encoding 'ISO-8859-1', :for => :comment
|
66
|
+
# external_encoding 'ISO-8859-1', :for => [:first_name, :last_name]
|
67
|
+
# end
|
68
|
+
#
|
69
|
+
def external_encoding (new_encoding, options = {})
|
70
|
+
extend ActiveRecordEncoding::ExtendedClassMethods
|
71
|
+
include ActiveRecordEncoding::IncludedInstanceMethods
|
72
|
+
|
73
|
+
if attr_names = options[:for]
|
74
|
+
[*attr_names].each do |attr_name|
|
75
|
+
@active_record_encodings[attr_name.to_s][:ext] = new_encoding
|
76
|
+
end
|
77
|
+
else
|
78
|
+
@active_record_external_encoding = new_encoding
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
# With the removal of #internal_encoding, the #encoding method is now
|
84
|
+
# identical to #external_encoding.
|
85
|
+
alias encoding external_encoding
|
86
|
+
|
87
|
+
end # ActiveRecordEncoding::StandardClassMethods
|
88
|
+
|
89
|
+
|
90
|
+
#
|
91
|
+
# ExtendedClassMethods defines class methods for inclusion in
|
92
|
+
# models sub-classed from ActiveRecord::Base to do the dirty work. It
|
93
|
+
# is only included in models that use ActiveRecordEncoding.
|
94
|
+
#
|
95
|
+
module ActiveRecordEncoding::ExtendedClassMethods
|
96
|
+
|
97
|
+
def active_record_external_encoding (attr_name = nil) #:nodoc:
|
98
|
+
if @active_record_encodings.nil?
|
99
|
+
klass = self.superclass
|
100
|
+
while @active_record_encodings.nil? and klass != ActiveRecord::Base and klass != Object
|
101
|
+
@active_record_encodings = klass.instance_variable_get(:@active_record_encodings)
|
102
|
+
klass = klass.superclass
|
103
|
+
end
|
104
|
+
|
105
|
+
if @active_record_encodings.nil?
|
106
|
+
raise "internal error -- can't find encodings"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
@active_record_encodings[attr_name][:ext] ||
|
111
|
+
@active_record_external_encoding
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
# Redefine the attribute read method to do the conversion.
|
116
|
+
def encoding_aware_define_read_method (symbol, attr_name, column) #:nodoc:
|
117
|
+
pre_encoding_aware_define_read_method(symbol, attr_name, column)
|
118
|
+
return if active_record_external_encoding(attr_name).nil?
|
119
|
+
method_name = "encoding_aware_attr_#{symbol}".to_sym
|
120
|
+
old_method_name = "pre_#{method_name}".to_sym
|
121
|
+
code = <<-__EOM__
|
122
|
+
encoding_aware_attribute_cast!(#{attr_name.inspect}, #{old_method_name})
|
123
|
+
__EOM__
|
124
|
+
evaluate_attribute_method attr_name, "def #{method_name}; #{code}; end"
|
125
|
+
alias_method "pre_#{method_name}".to_sym, symbol
|
126
|
+
alias_method symbol, method_name
|
127
|
+
end
|
128
|
+
|
129
|
+
end # ActiveRecordEncoding::ExtendedClassMethods
|
130
|
+
|
131
|
+
|
132
|
+
#
|
133
|
+
# IncludedInstanceMethods defines instance methods for inclusion in
|
134
|
+
# models sub-classed from ActiveRecord::Base to do the dirty work. It
|
135
|
+
# is only included in models that use ActiveRecordEncoding.
|
136
|
+
#
|
137
|
+
module ActiveRecordEncoding::IncludedInstanceMethods
|
138
|
+
|
139
|
+
def self.included (model_class) #:nodoc:
|
140
|
+
return if model_class.instance_variable_get(:@active_record_encodings)
|
141
|
+
|
142
|
+
class << model_class
|
143
|
+
alias_method :pre_encoding_aware_define_read_method, :define_read_method
|
144
|
+
alias_method :define_read_method, :encoding_aware_define_read_method
|
145
|
+
end
|
146
|
+
|
147
|
+
model_class.class_eval do
|
148
|
+
@active_record_encodings = Hash.new { |h, k| h[k] = Hash.new }
|
149
|
+
alias_method :pre_encoding_aware_read_attribute, :read_attribute
|
150
|
+
alias_method :read_attribute, :encoding_aware_read_attribute
|
151
|
+
alias_method :pre_encoding_aware_write_attribute, :write_attribute
|
152
|
+
alias_method :write_attribute, :encoding_aware_write_attribute
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Method that casts the Binary data into Unicode, if necessary. On
|
157
|
+
# ":read" operations the value converted from the external encoding to
|
158
|
+
# UTF-8 and the operation happens to the value in place. On ":write"
|
159
|
+
# operations the value is cast to 'UTF-8' if no encoding is set, but
|
160
|
+
# data is not converted, and the operation happens on a duplicate
|
161
|
+
# object.
|
162
|
+
def encoding_aware_attribute_cast! (attr_name, value, op = :read) #:nodoc:
|
163
|
+
if not value.frozen? and
|
164
|
+
not value.instance_variable_get(:@active_record_encoded) \
|
165
|
+
then
|
166
|
+
|
167
|
+
if op == :read
|
168
|
+
if ext_encoding = self.class.active_record_external_encoding(attr_name)
|
169
|
+
if value.respond_to? :encoding
|
170
|
+
value.force_encoding(ext_encoding).encode!('UTF-8')
|
171
|
+
elsif value.respond_to? :mb_chars
|
172
|
+
value.replace Iconv.conv('UTF-8', ext_encoding, value)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
elsif op == :write
|
177
|
+
if value.respond_to? :encoding
|
178
|
+
(value = value.dup) rescue nil
|
179
|
+
value.force_encoding('UTF-8') if value.encoding.name == 'ASCII-8BIT'
|
180
|
+
end
|
181
|
+
|
182
|
+
else
|
183
|
+
raise "invalid operation"
|
184
|
+
end
|
185
|
+
|
186
|
+
value.instance_variable_set(:@active_record_encoded, true)
|
187
|
+
end
|
188
|
+
|
189
|
+
value
|
190
|
+
end
|
191
|
+
|
192
|
+
# Normal replacement method for read_attribute.
|
193
|
+
def pure_encoding_aware_read_attribute (attr_name) #:nodoc:
|
194
|
+
value = pre_encoding_aware_read_attribute(attr_name)
|
195
|
+
encoding_aware_attribute_cast!(attr_name, value)
|
196
|
+
end
|
197
|
+
private :pure_encoding_aware_read_attribute
|
198
|
+
|
199
|
+
|
200
|
+
# Replacement method for read_attribute when Rails is preparing data
|
201
|
+
# for write.
|
202
|
+
def encoding_aware_read_attribute_for_write (attr_name) #:nodoc:
|
203
|
+
value = pure_encoding_aware_read_attribute(attr_name)
|
204
|
+
|
205
|
+
if ext_encoding = self.class.active_record_external_encoding(attr_name)
|
206
|
+
if value.respond_to? :encoding
|
207
|
+
value = value.encode(ext_encoding).force_encoding('ASCII-8BIT')
|
208
|
+
elsif value.respond_to? :mb_chars
|
209
|
+
value = Iconv.conv(ext_encoding, 'UTF-8', value)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
value
|
214
|
+
end
|
215
|
+
private :encoding_aware_read_attribute_for_write
|
216
|
+
|
217
|
+
|
218
|
+
def encoding_aware_read_attribute (attr_name) #:nodoc:
|
219
|
+
# We need to behave differently if called from
|
220
|
+
# #attributes_with_quotes because that is how Rails knows what value
|
221
|
+
# to write out. Doing it this way is an unfortunate kludge.
|
222
|
+
if caller.grep(/`attributes_with_quotes'$/).empty?
|
223
|
+
pure_encoding_aware_read_attribute(attr_name)
|
224
|
+
else
|
225
|
+
encoding_aware_read_attribute_for_write(attr_name)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
# We need to replace write_attribute so that we can set
|
231
|
+
# +@active_record_encoded+ to +true+ on the value being passed in.
|
232
|
+
# Otherwise the value is force_encoded according to the rules defined
|
233
|
+
# by the user and it results in corrupted data.
|
234
|
+
def encoding_aware_write_attribute (attr_name, value) #:nodoc:
|
235
|
+
value = encoding_aware_attribute_cast!(attr_name, value, :write)
|
236
|
+
pre_encoding_aware_write_attribute(attr_name, value)
|
237
|
+
end
|
238
|
+
|
239
|
+
end # ActiveRecordEncoding::IncludedInstanceMethods
|
240
|
+
|
241
|
+
|
242
|
+
ActiveRecord::Base.extend ActiveRecordEncoding::StandardClassMethods
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: active_record_encoding
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 10
|
8
|
+
- 1
|
9
|
+
version: 0.10.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Michael H. Buselli
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-09-11 00:00:00 -05:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: |
|
22
|
+
Library to monkey-patch ActiveRecord and add some Unicode awareness
|
23
|
+
|
24
|
+
email: cosine@cosine.org
|
25
|
+
executables: []
|
26
|
+
|
27
|
+
extensions: []
|
28
|
+
|
29
|
+
extra_rdoc_files: []
|
30
|
+
|
31
|
+
files:
|
32
|
+
- LICENSE
|
33
|
+
- lib/active_record_encoding.rb
|
34
|
+
has_rdoc: true
|
35
|
+
homepage: http://cosine.org/
|
36
|
+
licenses: []
|
37
|
+
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options: []
|
40
|
+
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
segments:
|
48
|
+
- 0
|
49
|
+
version: "0"
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
57
|
+
requirements: []
|
58
|
+
|
59
|
+
rubyforge_project: active_record_encoding
|
60
|
+
rubygems_version: 1.3.6
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: Library to monkey-patch ActiveRecord and add some Unicode awareness
|
64
|
+
test_files: []
|
65
|
+
|