rightmove_wrangler 0.1.8 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rightmove_wrangler/version.rb +1 -1
- data/lib/rightmove_wrangler.rb +17 -1
- metadata +1 -1
data/lib/rightmove_wrangler.rb
CHANGED
@@ -96,7 +96,7 @@ module RightmoveWrangler
|
|
96
96
|
end
|
97
97
|
|
98
98
|
def work_blm_file(file)
|
99
|
-
blm = BLM::Document.new( File.open(file, "r").read )
|
99
|
+
blm = BLM::Document.new( force_convert_utf8(File.open(file, "r").read) )
|
100
100
|
rows = blm.data.collect do |row|
|
101
101
|
row_hash = {}
|
102
102
|
row.attributes.each do |key, value|
|
@@ -209,5 +209,21 @@ module RightmoveWrangler
|
|
209
209
|
end
|
210
210
|
end
|
211
211
|
end
|
212
|
+
|
213
|
+
def force_convert_utf8(string)
|
214
|
+
begin
|
215
|
+
# Try it as UTF-8 directly
|
216
|
+
cleaned = string.dup.force_encoding('UTF-8')
|
217
|
+
unless cleaned.valid_encoding?
|
218
|
+
# Some of it might be old Windows code page
|
219
|
+
cleaned = string.encode( 'UTF-8', 'Windows-1252' )
|
220
|
+
end
|
221
|
+
utf8_string = cleaned
|
222
|
+
rescue EncodingError
|
223
|
+
# Force it to UTF-8, throwing out invalid bits
|
224
|
+
utf8_string = string.encode!( 'UTF-8', invalid: :replace, undef: :replace )
|
225
|
+
end
|
226
|
+
utf8_string
|
227
|
+
end
|
212
228
|
|
213
229
|
end
|