active_record_encoding 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/LICENSE +21 -0
  2. data/lib/active_record_encoding.rb +242 -0
  3. metadata +65 -0
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2009, Michael H. Buselli
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+ * Redistributions of source code must retain the above copyright
7
+ notice, this list of conditions and the following disclaimer.
8
+ * Redistributions in binary form must reproduce the above copyright
9
+ notice, this list of conditions and the following disclaimer in the
10
+ documentation and/or other materials provided with the distribution.
11
+
12
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ''AS IS'' AND ANY
13
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
16
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,242 @@
1
+ #
2
+ # Copyright (c) 2009, Michael H. Buselli
3
+ # See LICENSE for details. All other rights reserved.
4
+ #
5
+ #######
6
+
7
+ #
8
+ # ActiveRecordEncoding — Module to make ActiveRecord aware of Unicode
9
+ # encoding issues. It should be used only if the underlying database
10
+ # and its driver does not or cannot properly handle the encoding of the
11
+ # data it returns (usually as "ASCII-8BIT"). Most databases can
12
+ # properly encode data, however, so your first assumption should be that
13
+ # you do not need this software unless you really know you need it.
14
+ #
15
+ # ActiveRecordEncoding keeps a variables for each column and table
16
+ # where encoding is requested so it knows how the data is encoded in the
17
+ # database. This variable is called its external_encoding.
18
+ #
19
+ # External encodings must be defined for each column or table where
20
+ # a translation is to occur, and this is done in the model definition:
21
+ #
22
+ # class User < ActiveRecord::Base
23
+ # external_encoding 'ISO-8859-1', :for => :comment
24
+ # external_encoding 'ISO-8859-1', :for => [:first_name, :last_name]
25
+ # end
26
+ #
27
+ # Data is converted to UTF-8 when passed to the user.
28
+ #
29
+ # When data is being saved back to the database, it is assumed that the
30
+ # data is in UTF-8, or on Ruby 1.9 if the String's encoding value is
31
+ # something other than "ASCII-8BIT" it uses that encoding.
32
+ #
33
+ module ActiveRecordEncoding
34
+ end
35
+
36
+
37
+ # Use Iconv if String objects don't know about #encoding. This is so
38
+ # the library functions on Ruby 1.8.
39
+ if not ''.respond_to? :encoding
40
+ require 'iconv'
41
+ end
42
+
43
+
44
+ #
45
+ # StandardClassMethods defines class methods for inclusion in
46
+ # ActiveRecord::Base in order to provide the user interface for
47
+ # ActiveRecordEncoding.
48
+ #
49
+ module ActiveRecordEncoding::StandardClassMethods
50
+
51
+ #
52
+ # Set the external_encoding value for this model class.
53
+ #
54
+ # class User < ActiveRecord::Base
55
+ # external_encoding 'ISO-8859-1' # affect all binary columns
56
+ # end
57
+ #
58
+ # When data is retrieved from the database, it will be assumed it is
59
+ # encoded in the given format.
60
+ #
61
+ # This may also be called with the :for option pointing to one or more
62
+ # specific columns that this call applies to:
63
+ #
64
+ # class User < ActiveRecord::Base
65
+ # external_encoding 'ISO-8859-1', :for => :comment
66
+ # external_encoding 'ISO-8859-1', :for => [:first_name, :last_name]
67
+ # end
68
+ #
69
+ def external_encoding (new_encoding, options = {})
70
+ extend ActiveRecordEncoding::ExtendedClassMethods
71
+ include ActiveRecordEncoding::IncludedInstanceMethods
72
+
73
+ if attr_names = options[:for]
74
+ [*attr_names].each do |attr_name|
75
+ @active_record_encodings[attr_name.to_s][:ext] = new_encoding
76
+ end
77
+ else
78
+ @active_record_external_encoding = new_encoding
79
+ end
80
+ end
81
+
82
+
83
+ # With the removal of #internal_encoding, the #encoding method is now
84
+ # identical to #external_encoding.
85
+ alias encoding external_encoding
86
+
87
+ end # ActiveRecordEncoding::StandardClassMethods
88
+
89
+
90
+ #
91
+ # ExtendedClassMethods defines class methods for inclusion in
92
+ # models sub-classed from ActiveRecord::Base to do the dirty work. It
93
+ # is only included in models that use ActiveRecordEncoding.
94
+ #
95
+ module ActiveRecordEncoding::ExtendedClassMethods
96
+
97
+ def active_record_external_encoding (attr_name = nil) #:nodoc:
98
+ if @active_record_encodings.nil?
99
+ klass = self.superclass
100
+ while @active_record_encodings.nil? and klass != ActiveRecord::Base and klass != Object
101
+ @active_record_encodings = klass.instance_variable_get(:@active_record_encodings)
102
+ klass = klass.superclass
103
+ end
104
+
105
+ if @active_record_encodings.nil?
106
+ raise "internal error -- can't find encodings"
107
+ end
108
+ end
109
+
110
+ @active_record_encodings[attr_name][:ext] ||
111
+ @active_record_external_encoding
112
+ end
113
+
114
+
115
+ # Redefine the attribute read method to do the conversion.
116
+ def encoding_aware_define_read_method (symbol, attr_name, column) #:nodoc:
117
+ pre_encoding_aware_define_read_method(symbol, attr_name, column)
118
+ return if active_record_external_encoding(attr_name).nil?
119
+ method_name = "encoding_aware_attr_#{symbol}".to_sym
120
+ old_method_name = "pre_#{method_name}".to_sym
121
+ code = <<-__EOM__
122
+ encoding_aware_attribute_cast!(#{attr_name.inspect}, #{old_method_name})
123
+ __EOM__
124
+ evaluate_attribute_method attr_name, "def #{method_name}; #{code}; end"
125
+ alias_method "pre_#{method_name}".to_sym, symbol
126
+ alias_method symbol, method_name
127
+ end
128
+
129
+ end # ActiveRecordEncoding::ExtendedClassMethods
130
+
131
+
132
+ #
133
+ # IncludedInstanceMethods defines instance methods for inclusion in
134
+ # models sub-classed from ActiveRecord::Base to do the dirty work. It
135
+ # is only included in models that use ActiveRecordEncoding.
136
+ #
137
+ module ActiveRecordEncoding::IncludedInstanceMethods
138
+
139
+ def self.included (model_class) #:nodoc:
140
+ return if model_class.instance_variable_get(:@active_record_encodings)
141
+
142
+ class << model_class
143
+ alias_method :pre_encoding_aware_define_read_method, :define_read_method
144
+ alias_method :define_read_method, :encoding_aware_define_read_method
145
+ end
146
+
147
+ model_class.class_eval do
148
+ @active_record_encodings = Hash.new { |h, k| h[k] = Hash.new }
149
+ alias_method :pre_encoding_aware_read_attribute, :read_attribute
150
+ alias_method :read_attribute, :encoding_aware_read_attribute
151
+ alias_method :pre_encoding_aware_write_attribute, :write_attribute
152
+ alias_method :write_attribute, :encoding_aware_write_attribute
153
+ end
154
+ end
155
+
156
+ # Method that casts the Binary data into Unicode, if necessary. On
157
+ # ":read" operations the value converted from the external encoding to
158
+ # UTF-8 and the operation happens to the value in place. On ":write"
159
+ # operations the value is cast to 'UTF-8' if no encoding is set, but
160
+ # data is not converted, and the operation happens on a duplicate
161
+ # object.
162
+ def encoding_aware_attribute_cast! (attr_name, value, op = :read) #:nodoc:
163
+ if not value.frozen? and
164
+ not value.instance_variable_get(:@active_record_encoded) \
165
+ then
166
+
167
+ if op == :read
168
+ if ext_encoding = self.class.active_record_external_encoding(attr_name)
169
+ if value.respond_to? :encoding
170
+ value.force_encoding(ext_encoding).encode!('UTF-8')
171
+ elsif value.respond_to? :mb_chars
172
+ value.replace Iconv.conv('UTF-8', ext_encoding, value)
173
+ end
174
+ end
175
+
176
+ elsif op == :write
177
+ if value.respond_to? :encoding
178
+ (value = value.dup) rescue nil
179
+ value.force_encoding('UTF-8') if value.encoding.name == 'ASCII-8BIT'
180
+ end
181
+
182
+ else
183
+ raise "invalid operation"
184
+ end
185
+
186
+ value.instance_variable_set(:@active_record_encoded, true)
187
+ end
188
+
189
+ value
190
+ end
191
+
192
+ # Normal replacement method for read_attribute.
193
+ def pure_encoding_aware_read_attribute (attr_name) #:nodoc:
194
+ value = pre_encoding_aware_read_attribute(attr_name)
195
+ encoding_aware_attribute_cast!(attr_name, value)
196
+ end
197
+ private :pure_encoding_aware_read_attribute
198
+
199
+
200
+ # Replacement method for read_attribute when Rails is preparing data
201
+ # for write.
202
+ def encoding_aware_read_attribute_for_write (attr_name) #:nodoc:
203
+ value = pure_encoding_aware_read_attribute(attr_name)
204
+
205
+ if ext_encoding = self.class.active_record_external_encoding(attr_name)
206
+ if value.respond_to? :encoding
207
+ value = value.encode(ext_encoding).force_encoding('ASCII-8BIT')
208
+ elsif value.respond_to? :mb_chars
209
+ value = Iconv.conv(ext_encoding, 'UTF-8', value)
210
+ end
211
+ end
212
+
213
+ value
214
+ end
215
+ private :encoding_aware_read_attribute_for_write
216
+
217
+
218
+ def encoding_aware_read_attribute (attr_name) #:nodoc:
219
+ # We need to behave differently if called from
220
+ # #attributes_with_quotes because that is how Rails knows what value
221
+ # to write out. Doing it this way is an unfortunate kludge.
222
+ if caller.grep(/`attributes_with_quotes'$/).empty?
223
+ pure_encoding_aware_read_attribute(attr_name)
224
+ else
225
+ encoding_aware_read_attribute_for_write(attr_name)
226
+ end
227
+ end
228
+
229
+
230
+ # We need to replace write_attribute so that we can set
231
+ # +@active_record_encoded+ to +true+ on the value being passed in.
232
+ # Otherwise the value is force_encoded according to the rules defined
233
+ # by the user and it results in corrupted data.
234
+ def encoding_aware_write_attribute (attr_name, value) #:nodoc:
235
+ value = encoding_aware_attribute_cast!(attr_name, value, :write)
236
+ pre_encoding_aware_write_attribute(attr_name, value)
237
+ end
238
+
239
+ end # ActiveRecordEncoding::IncludedInstanceMethods
240
+
241
+
242
+ ActiveRecord::Base.extend ActiveRecordEncoding::StandardClassMethods
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: active_record_encoding
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 10
8
+ - 1
9
+ version: 0.10.1
10
+ platform: ruby
11
+ authors:
12
+ - Michael H. Buselli
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-09-11 00:00:00 -05:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: |
22
+ Library to monkey-patch ActiveRecord and add some Unicode awareness
23
+
24
+ email: cosine@cosine.org
25
+ executables: []
26
+
27
+ extensions: []
28
+
29
+ extra_rdoc_files: []
30
+
31
+ files:
32
+ - LICENSE
33
+ - lib/active_record_encoding.rb
34
+ has_rdoc: true
35
+ homepage: http://cosine.org/
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ segments:
48
+ - 0
49
+ version: "0"
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ segments:
55
+ - 0
56
+ version: "0"
57
+ requirements: []
58
+
59
+ rubyforge_project: active_record_encoding
60
+ rubygems_version: 1.3.6
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: Library to monkey-patch ActiveRecord and add some Unicode awareness
64
+ test_files: []
65
+