active_record_encoding 0.10.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/LICENSE +21 -0
  2. data/lib/active_record_encoding.rb +242 -0
  3. metadata +65 -0
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2009, Michael H. Buselli
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+ * Redistributions of source code must retain the above copyright
7
+ notice, this list of conditions and the following disclaimer.
8
+ * Redistributions in binary form must reproduce the above copyright
9
+ notice, this list of conditions and the following disclaimer in the
10
+ documentation and/or other materials provided with the distribution.
11
+
12
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ''AS IS'' AND ANY
13
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
16
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,242 @@
1
+ #
2
+ # Copyright (c) 2009, Michael H. Buselli
3
+ # See LICENSE for details. All other rights reserved.
4
+ #
5
+ #######
6
+
7
+ #
8
+ # ActiveRecordEncoding — Module to make ActiveRecord aware of Unicode
9
+ # encoding issues. It should be used only if the underlying database
10
+ # and its driver does not or cannot properly handle the encoding of the
11
+ # data it returns (usually as "ASCII-8BIT"). Most databases can
12
+ # properly encode data, however, so your first assumption should be that
13
+ # you do not need this software unless you really know you need it.
14
+ #
15
+ # ActiveRecordEncoding keeps a variables for each column and table
16
+ # where encoding is requested so it knows how the data is encoded in the
17
+ # database. This variable is called its external_encoding.
18
+ #
19
+ # External encodings must be defined for each column or table where
20
+ # a translation is to occur, and this is done in the model definition:
21
+ #
22
+ # class User < ActiveRecord::Base
23
+ # external_encoding 'ISO-8859-1', :for => :comment
24
+ # external_encoding 'ISO-8859-1', :for => [:first_name, :last_name]
25
+ # end
26
+ #
27
+ # Data is converted to UTF-8 when passed to the user.
28
+ #
29
+ # When data is being saved back to the database, it is assumed that the
30
+ # data is in UTF-8, or on Ruby 1.9 if the String's encoding value is
31
+ # something other than "ASCII-8BIT" it uses that encoding.
32
+ #
33
+ module ActiveRecordEncoding
34
+ end
35
+
36
+
37
+ # Use Iconv if String objects don't know about #encoding. This is so
38
+ # the library functions on Ruby 1.8.
39
+ if not ''.respond_to? :encoding
40
+ require 'iconv'
41
+ end
42
+
43
+
44
+ #
45
+ # StandardClassMethods defines class methods for inclusion in
46
+ # ActiveRecord::Base in order to provide the user interface for
47
+ # ActiveRecordEncoding.
48
+ #
49
+ module ActiveRecordEncoding::StandardClassMethods
50
+
51
+ #
52
+ # Set the external_encoding value for this model class.
53
+ #
54
+ # class User < ActiveRecord::Base
55
+ # external_encoding 'ISO-8859-1' # affect all binary columns
56
+ # end
57
+ #
58
+ # When data is retrieved from the database, it will be assumed it is
59
+ # encoded in the given format.
60
+ #
61
+ # This may also be called with the :for option pointing to one or more
62
+ # specific columns that this call applies to:
63
+ #
64
+ # class User < ActiveRecord::Base
65
+ # external_encoding 'ISO-8859-1', :for => :comment
66
+ # external_encoding 'ISO-8859-1', :for => [:first_name, :last_name]
67
+ # end
68
+ #
69
+ def external_encoding (new_encoding, options = {})
70
+ extend ActiveRecordEncoding::ExtendedClassMethods
71
+ include ActiveRecordEncoding::IncludedInstanceMethods
72
+
73
+ if attr_names = options[:for]
74
+ [*attr_names].each do |attr_name|
75
+ @active_record_encodings[attr_name.to_s][:ext] = new_encoding
76
+ end
77
+ else
78
+ @active_record_external_encoding = new_encoding
79
+ end
80
+ end
81
+
82
+
83
+ # With the removal of #internal_encoding, the #encoding method is now
84
+ # identical to #external_encoding.
85
+ alias encoding external_encoding
86
+
87
+ end # ActiveRecordEncoding::StandardClassMethods
88
+
89
+
90
+ #
91
+ # ExtendedClassMethods defines class methods for inclusion in
92
+ # models sub-classed from ActiveRecord::Base to do the dirty work. It
93
+ # is only included in models that use ActiveRecordEncoding.
94
+ #
95
+ module ActiveRecordEncoding::ExtendedClassMethods
96
+
97
+ def active_record_external_encoding (attr_name = nil) #:nodoc:
98
+ if @active_record_encodings.nil?
99
+ klass = self.superclass
100
+ while @active_record_encodings.nil? and klass != ActiveRecord::Base and klass != Object
101
+ @active_record_encodings = klass.instance_variable_get(:@active_record_encodings)
102
+ klass = klass.superclass
103
+ end
104
+
105
+ if @active_record_encodings.nil?
106
+ raise "internal error -- can't find encodings"
107
+ end
108
+ end
109
+
110
+ @active_record_encodings[attr_name][:ext] ||
111
+ @active_record_external_encoding
112
+ end
113
+
114
+
115
+ # Redefine the attribute read method to do the conversion.
116
+ def encoding_aware_define_read_method (symbol, attr_name, column) #:nodoc:
117
+ pre_encoding_aware_define_read_method(symbol, attr_name, column)
118
+ return if active_record_external_encoding(attr_name).nil?
119
+ method_name = "encoding_aware_attr_#{symbol}".to_sym
120
+ old_method_name = "pre_#{method_name}".to_sym
121
+ code = <<-__EOM__
122
+ encoding_aware_attribute_cast!(#{attr_name.inspect}, #{old_method_name})
123
+ __EOM__
124
+ evaluate_attribute_method attr_name, "def #{method_name}; #{code}; end"
125
+ alias_method "pre_#{method_name}".to_sym, symbol
126
+ alias_method symbol, method_name
127
+ end
128
+
129
+ end # ActiveRecordEncoding::ExtendedClassMethods
130
+
131
+
132
+ #
133
+ # IncludedInstanceMethods defines instance methods for inclusion in
134
+ # models sub-classed from ActiveRecord::Base to do the dirty work. It
135
+ # is only included in models that use ActiveRecordEncoding.
136
+ #
137
+ module ActiveRecordEncoding::IncludedInstanceMethods
138
+
139
+ def self.included (model_class) #:nodoc:
140
+ return if model_class.instance_variable_get(:@active_record_encodings)
141
+
142
+ class << model_class
143
+ alias_method :pre_encoding_aware_define_read_method, :define_read_method
144
+ alias_method :define_read_method, :encoding_aware_define_read_method
145
+ end
146
+
147
+ model_class.class_eval do
148
+ @active_record_encodings = Hash.new { |h, k| h[k] = Hash.new }
149
+ alias_method :pre_encoding_aware_read_attribute, :read_attribute
150
+ alias_method :read_attribute, :encoding_aware_read_attribute
151
+ alias_method :pre_encoding_aware_write_attribute, :write_attribute
152
+ alias_method :write_attribute, :encoding_aware_write_attribute
153
+ end
154
+ end
155
+
156
+ # Method that casts the Binary data into Unicode, if necessary. On
157
+ # ":read" operations the value converted from the external encoding to
158
+ # UTF-8 and the operation happens to the value in place. On ":write"
159
+ # operations the value is cast to 'UTF-8' if no encoding is set, but
160
+ # data is not converted, and the operation happens on a duplicate
161
+ # object.
162
+ def encoding_aware_attribute_cast! (attr_name, value, op = :read) #:nodoc:
163
+ if not value.frozen? and
164
+ not value.instance_variable_get(:@active_record_encoded) \
165
+ then
166
+
167
+ if op == :read
168
+ if ext_encoding = self.class.active_record_external_encoding(attr_name)
169
+ if value.respond_to? :encoding
170
+ value.force_encoding(ext_encoding).encode!('UTF-8')
171
+ elsif value.respond_to? :mb_chars
172
+ value.replace Iconv.conv('UTF-8', ext_encoding, value)
173
+ end
174
+ end
175
+
176
+ elsif op == :write
177
+ if value.respond_to? :encoding
178
+ (value = value.dup) rescue nil
179
+ value.force_encoding('UTF-8') if value.encoding.name == 'ASCII-8BIT'
180
+ end
181
+
182
+ else
183
+ raise "invalid operation"
184
+ end
185
+
186
+ value.instance_variable_set(:@active_record_encoded, true)
187
+ end
188
+
189
+ value
190
+ end
191
+
192
+ # Normal replacement method for read_attribute.
193
+ def pure_encoding_aware_read_attribute (attr_name) #:nodoc:
194
+ value = pre_encoding_aware_read_attribute(attr_name)
195
+ encoding_aware_attribute_cast!(attr_name, value)
196
+ end
197
+ private :pure_encoding_aware_read_attribute
198
+
199
+
200
+ # Replacement method for read_attribute when Rails is preparing data
201
+ # for write.
202
+ def encoding_aware_read_attribute_for_write (attr_name) #:nodoc:
203
+ value = pure_encoding_aware_read_attribute(attr_name)
204
+
205
+ if ext_encoding = self.class.active_record_external_encoding(attr_name)
206
+ if value.respond_to? :encoding
207
+ value = value.encode(ext_encoding).force_encoding('ASCII-8BIT')
208
+ elsif value.respond_to? :mb_chars
209
+ value = Iconv.conv(ext_encoding, 'UTF-8', value)
210
+ end
211
+ end
212
+
213
+ value
214
+ end
215
+ private :encoding_aware_read_attribute_for_write
216
+
217
+
218
+ def encoding_aware_read_attribute (attr_name) #:nodoc:
219
+ # We need to behave differently if called from
220
+ # #attributes_with_quotes because that is how Rails knows what value
221
+ # to write out. Doing it this way is an unfortunate kludge.
222
+ if caller.grep(/`attributes_with_quotes'$/).empty?
223
+ pure_encoding_aware_read_attribute(attr_name)
224
+ else
225
+ encoding_aware_read_attribute_for_write(attr_name)
226
+ end
227
+ end
228
+
229
+
230
+ # We need to replace write_attribute so that we can set
231
+ # +@active_record_encoded+ to +true+ on the value being passed in.
232
+ # Otherwise the value is force_encoded according to the rules defined
233
+ # by the user and it results in corrupted data.
234
+ def encoding_aware_write_attribute (attr_name, value) #:nodoc:
235
+ value = encoding_aware_attribute_cast!(attr_name, value, :write)
236
+ pre_encoding_aware_write_attribute(attr_name, value)
237
+ end
238
+
239
+ end # ActiveRecordEncoding::IncludedInstanceMethods
240
+
241
+
242
+ ActiveRecord::Base.extend ActiveRecordEncoding::StandardClassMethods
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: active_record_encoding
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 10
8
+ - 1
9
+ version: 0.10.1
10
+ platform: ruby
11
+ authors:
12
+ - Michael H. Buselli
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-09-11 00:00:00 -05:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: |
22
+ Library to monkey-patch ActiveRecord and add some Unicode awareness
23
+
24
+ email: cosine@cosine.org
25
+ executables: []
26
+
27
+ extensions: []
28
+
29
+ extra_rdoc_files: []
30
+
31
+ files:
32
+ - LICENSE
33
+ - lib/active_record_encoding.rb
34
+ has_rdoc: true
35
+ homepage: http://cosine.org/
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ segments:
48
+ - 0
49
+ version: "0"
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ segments:
55
+ - 0
56
+ version: "0"
57
+ requirements: []
58
+
59
+ rubyforge_project: active_record_encoding
60
+ rubygems_version: 1.3.6
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: Library to monkey-patch ActiveRecord and add some Unicode awareness
64
+ test_files: []
65
+