active_record_encoding 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +21 -0
- data/lib/active_record_encoding.rb +242 -0
- metadata +65 -0
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2009, Michael H. Buselli
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
* Redistributions of source code must retain the above copyright
|
7
|
+
notice, this list of conditions and the following disclaimer.
|
8
|
+
* Redistributions in binary form must reproduce the above copyright
|
9
|
+
notice, this list of conditions and the following disclaimer in the
|
10
|
+
documentation and/or other materials provided with the distribution.
|
11
|
+
|
12
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ''AS IS'' AND ANY
|
13
|
+
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
14
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
15
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
|
16
|
+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
17
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
18
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
19
|
+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
20
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
21
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@@ -0,0 +1,242 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2009, Michael H. Buselli
|
3
|
+
# See LICENSE for details. All other rights reserved.
|
4
|
+
#
|
5
|
+
#######
|
6
|
+
|
7
|
+
#
|
8
|
+
# ActiveRecordEncoding — Module to make ActiveRecord aware of Unicode
|
9
|
+
# encoding issues. It should be used only if the underlying database
|
10
|
+
# and its driver does not or cannot properly handle the encoding of the
|
11
|
+
# data it returns (usually as "ASCII-8BIT"). Most databases can
|
12
|
+
# properly encode data, however, so your first assumption should be that
|
13
|
+
# you do not need this software unless you really know you need it.
|
14
|
+
#
|
15
|
+
# ActiveRecordEncoding keeps a variables for each column and table
|
16
|
+
# where encoding is requested so it knows how the data is encoded in the
|
17
|
+
# database. This variable is called its external_encoding.
|
18
|
+
#
|
19
|
+
# External encodings must be defined for each column or table where
|
20
|
+
# a translation is to occur, and this is done in the model definition:
|
21
|
+
#
|
22
|
+
# class User < ActiveRecord::Base
|
23
|
+
# external_encoding 'ISO-8859-1', :for => :comment
|
24
|
+
# external_encoding 'ISO-8859-1', :for => [:first_name, :last_name]
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# Data is converted to UTF-8 when passed to the user.
|
28
|
+
#
|
29
|
+
# When data is being saved back to the database, it is assumed that the
|
30
|
+
# data is in UTF-8, or on Ruby 1.9 if the String's encoding value is
|
31
|
+
# something other than "ASCII-8BIT" it uses that encoding.
|
32
|
+
#
|
33
|
+
module ActiveRecordEncoding
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
# Use Iconv if String objects don't know about #encoding. This is so
|
38
|
+
# the library functions on Ruby 1.8.
|
39
|
+
if not ''.respond_to? :encoding
|
40
|
+
require 'iconv'
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
#
|
45
|
+
# StandardClassMethods defines class methods for inclusion in
|
46
|
+
# ActiveRecord::Base in order to provide the user interface for
|
47
|
+
# ActiveRecordEncoding.
|
48
|
+
#
|
49
|
+
module ActiveRecordEncoding::StandardClassMethods
|
50
|
+
|
51
|
+
#
|
52
|
+
# Set the external_encoding value for this model class.
|
53
|
+
#
|
54
|
+
# class User < ActiveRecord::Base
|
55
|
+
# external_encoding 'ISO-8859-1' # affect all binary columns
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# When data is retrieved from the database, it will be assumed it is
|
59
|
+
# encoded in the given format.
|
60
|
+
#
|
61
|
+
# This may also be called with the :for option pointing to one or more
|
62
|
+
# specific columns that this call applies to:
|
63
|
+
#
|
64
|
+
# class User < ActiveRecord::Base
|
65
|
+
# external_encoding 'ISO-8859-1', :for => :comment
|
66
|
+
# external_encoding 'ISO-8859-1', :for => [:first_name, :last_name]
|
67
|
+
# end
|
68
|
+
#
|
69
|
+
def external_encoding (new_encoding, options = {})
|
70
|
+
extend ActiveRecordEncoding::ExtendedClassMethods
|
71
|
+
include ActiveRecordEncoding::IncludedInstanceMethods
|
72
|
+
|
73
|
+
if attr_names = options[:for]
|
74
|
+
[*attr_names].each do |attr_name|
|
75
|
+
@active_record_encodings[attr_name.to_s][:ext] = new_encoding
|
76
|
+
end
|
77
|
+
else
|
78
|
+
@active_record_external_encoding = new_encoding
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
# With the removal of #internal_encoding, the #encoding method is now
|
84
|
+
# identical to #external_encoding.
|
85
|
+
alias encoding external_encoding
|
86
|
+
|
87
|
+
end # ActiveRecordEncoding::StandardClassMethods
|
88
|
+
|
89
|
+
|
90
|
+
#
|
91
|
+
# ExtendedClassMethods defines class methods for inclusion in
|
92
|
+
# models sub-classed from ActiveRecord::Base to do the dirty work. It
|
93
|
+
# is only included in models that use ActiveRecordEncoding.
|
94
|
+
#
|
95
|
+
module ActiveRecordEncoding::ExtendedClassMethods
|
96
|
+
|
97
|
+
def active_record_external_encoding (attr_name = nil) #:nodoc:
|
98
|
+
if @active_record_encodings.nil?
|
99
|
+
klass = self.superclass
|
100
|
+
while @active_record_encodings.nil? and klass != ActiveRecord::Base and klass != Object
|
101
|
+
@active_record_encodings = klass.instance_variable_get(:@active_record_encodings)
|
102
|
+
klass = klass.superclass
|
103
|
+
end
|
104
|
+
|
105
|
+
if @active_record_encodings.nil?
|
106
|
+
raise "internal error -- can't find encodings"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
@active_record_encodings[attr_name][:ext] ||
|
111
|
+
@active_record_external_encoding
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
# Redefine the attribute read method to do the conversion.
|
116
|
+
def encoding_aware_define_read_method (symbol, attr_name, column) #:nodoc:
|
117
|
+
pre_encoding_aware_define_read_method(symbol, attr_name, column)
|
118
|
+
return if active_record_external_encoding(attr_name).nil?
|
119
|
+
method_name = "encoding_aware_attr_#{symbol}".to_sym
|
120
|
+
old_method_name = "pre_#{method_name}".to_sym
|
121
|
+
code = <<-__EOM__
|
122
|
+
encoding_aware_attribute_cast!(#{attr_name.inspect}, #{old_method_name})
|
123
|
+
__EOM__
|
124
|
+
evaluate_attribute_method attr_name, "def #{method_name}; #{code}; end"
|
125
|
+
alias_method "pre_#{method_name}".to_sym, symbol
|
126
|
+
alias_method symbol, method_name
|
127
|
+
end
|
128
|
+
|
129
|
+
end # ActiveRecordEncoding::ExtendedClassMethods
|
130
|
+
|
131
|
+
|
132
|
+
#
|
133
|
+
# IncludedInstanceMethods defines instance methods for inclusion in
|
134
|
+
# models sub-classed from ActiveRecord::Base to do the dirty work. It
|
135
|
+
# is only included in models that use ActiveRecordEncoding.
|
136
|
+
#
|
137
|
+
module ActiveRecordEncoding::IncludedInstanceMethods
|
138
|
+
|
139
|
+
def self.included (model_class) #:nodoc:
|
140
|
+
return if model_class.instance_variable_get(:@active_record_encodings)
|
141
|
+
|
142
|
+
class << model_class
|
143
|
+
alias_method :pre_encoding_aware_define_read_method, :define_read_method
|
144
|
+
alias_method :define_read_method, :encoding_aware_define_read_method
|
145
|
+
end
|
146
|
+
|
147
|
+
model_class.class_eval do
|
148
|
+
@active_record_encodings = Hash.new { |h, k| h[k] = Hash.new }
|
149
|
+
alias_method :pre_encoding_aware_read_attribute, :read_attribute
|
150
|
+
alias_method :read_attribute, :encoding_aware_read_attribute
|
151
|
+
alias_method :pre_encoding_aware_write_attribute, :write_attribute
|
152
|
+
alias_method :write_attribute, :encoding_aware_write_attribute
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Method that casts the Binary data into Unicode, if necessary. On
|
157
|
+
# ":read" operations the value converted from the external encoding to
|
158
|
+
# UTF-8 and the operation happens to the value in place. On ":write"
|
159
|
+
# operations the value is cast to 'UTF-8' if no encoding is set, but
|
160
|
+
# data is not converted, and the operation happens on a duplicate
|
161
|
+
# object.
|
162
|
+
def encoding_aware_attribute_cast! (attr_name, value, op = :read) #:nodoc:
|
163
|
+
if not value.frozen? and
|
164
|
+
not value.instance_variable_get(:@active_record_encoded) \
|
165
|
+
then
|
166
|
+
|
167
|
+
if op == :read
|
168
|
+
if ext_encoding = self.class.active_record_external_encoding(attr_name)
|
169
|
+
if value.respond_to? :encoding
|
170
|
+
value.force_encoding(ext_encoding).encode!('UTF-8')
|
171
|
+
elsif value.respond_to? :mb_chars
|
172
|
+
value.replace Iconv.conv('UTF-8', ext_encoding, value)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
elsif op == :write
|
177
|
+
if value.respond_to? :encoding
|
178
|
+
(value = value.dup) rescue nil
|
179
|
+
value.force_encoding('UTF-8') if value.encoding.name == 'ASCII-8BIT'
|
180
|
+
end
|
181
|
+
|
182
|
+
else
|
183
|
+
raise "invalid operation"
|
184
|
+
end
|
185
|
+
|
186
|
+
value.instance_variable_set(:@active_record_encoded, true)
|
187
|
+
end
|
188
|
+
|
189
|
+
value
|
190
|
+
end
|
191
|
+
|
192
|
+
# Normal replacement method for read_attribute.
|
193
|
+
def pure_encoding_aware_read_attribute (attr_name) #:nodoc:
|
194
|
+
value = pre_encoding_aware_read_attribute(attr_name)
|
195
|
+
encoding_aware_attribute_cast!(attr_name, value)
|
196
|
+
end
|
197
|
+
private :pure_encoding_aware_read_attribute
|
198
|
+
|
199
|
+
|
200
|
+
# Replacement method for read_attribute when Rails is preparing data
|
201
|
+
# for write.
|
202
|
+
def encoding_aware_read_attribute_for_write (attr_name) #:nodoc:
|
203
|
+
value = pure_encoding_aware_read_attribute(attr_name)
|
204
|
+
|
205
|
+
if ext_encoding = self.class.active_record_external_encoding(attr_name)
|
206
|
+
if value.respond_to? :encoding
|
207
|
+
value = value.encode(ext_encoding).force_encoding('ASCII-8BIT')
|
208
|
+
elsif value.respond_to? :mb_chars
|
209
|
+
value = Iconv.conv(ext_encoding, 'UTF-8', value)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
value
|
214
|
+
end
|
215
|
+
private :encoding_aware_read_attribute_for_write
|
216
|
+
|
217
|
+
|
218
|
+
def encoding_aware_read_attribute (attr_name) #:nodoc:
|
219
|
+
# We need to behave differently if called from
|
220
|
+
# #attributes_with_quotes because that is how Rails knows what value
|
221
|
+
# to write out. Doing it this way is an unfortunate kludge.
|
222
|
+
if caller.grep(/`attributes_with_quotes'$/).empty?
|
223
|
+
pure_encoding_aware_read_attribute(attr_name)
|
224
|
+
else
|
225
|
+
encoding_aware_read_attribute_for_write(attr_name)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
# We need to replace write_attribute so that we can set
|
231
|
+
# +@active_record_encoded+ to +true+ on the value being passed in.
|
232
|
+
# Otherwise the value is force_encoded according to the rules defined
|
233
|
+
# by the user and it results in corrupted data.
|
234
|
+
def encoding_aware_write_attribute (attr_name, value) #:nodoc:
|
235
|
+
value = encoding_aware_attribute_cast!(attr_name, value, :write)
|
236
|
+
pre_encoding_aware_write_attribute(attr_name, value)
|
237
|
+
end
|
238
|
+
|
239
|
+
end # ActiveRecordEncoding::IncludedInstanceMethods
|
240
|
+
|
241
|
+
|
242
|
+
ActiveRecord::Base.extend ActiveRecordEncoding::StandardClassMethods
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: active_record_encoding
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 10
|
8
|
+
- 1
|
9
|
+
version: 0.10.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Michael H. Buselli
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-09-11 00:00:00 -05:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: |
|
22
|
+
Library to monkey-patch ActiveRecord and add some Unicode awareness
|
23
|
+
|
24
|
+
email: cosine@cosine.org
|
25
|
+
executables: []
|
26
|
+
|
27
|
+
extensions: []
|
28
|
+
|
29
|
+
extra_rdoc_files: []
|
30
|
+
|
31
|
+
files:
|
32
|
+
- LICENSE
|
33
|
+
- lib/active_record_encoding.rb
|
34
|
+
has_rdoc: true
|
35
|
+
homepage: http://cosine.org/
|
36
|
+
licenses: []
|
37
|
+
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options: []
|
40
|
+
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
segments:
|
48
|
+
- 0
|
49
|
+
version: "0"
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
57
|
+
requirements: []
|
58
|
+
|
59
|
+
rubyforge_project: active_record_encoding
|
60
|
+
rubygems_version: 1.3.6
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: Library to monkey-patch ActiveRecord and add some Unicode awareness
|
64
|
+
test_files: []
|
65
|
+
|