rightmove_wrangler 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rightmove_wrangler/version.rb +1 -1
- data/lib/rightmove_wrangler.rb +17 -1
- metadata +1 -1
data/lib/rightmove_wrangler.rb
CHANGED
@@ -96,7 +96,7 @@ module RightmoveWrangler
|
|
96
96
|
end
|
97
97
|
|
98
98
|
def work_blm_file(file)
|
99
|
-
blm = BLM::Document.new( File.open(file, "r").read )
|
99
|
+
blm = BLM::Document.new( force_convert_utf8(File.open(file, "r").read) )
|
100
100
|
rows = blm.data.collect do |row|
|
101
101
|
row_hash = {}
|
102
102
|
row.attributes.each do |key, value|
|
@@ -209,5 +209,21 @@ module RightmoveWrangler
|
|
209
209
|
end
|
210
210
|
end
|
211
211
|
end
|
212
|
+
|
213
|
+
def force_convert_utf8(string)
|
214
|
+
begin
|
215
|
+
# Try it as UTF-8 directly
|
216
|
+
cleaned = string.dup.force_encoding('UTF-8')
|
217
|
+
unless cleaned.valid_encoding?
|
218
|
+
# Some of it might be old Windows code page
|
219
|
+
cleaned = string.encode( 'UTF-8', 'Windows-1252' )
|
220
|
+
end
|
221
|
+
utf8_string = cleaned
|
222
|
+
rescue EncodingError
|
223
|
+
# Force it to UTF-8, throwing out invalid bits
|
224
|
+
utf8_string = string.encode!( 'UTF-8', invalid: :replace, undef: :replace )
|
225
|
+
end
|
226
|
+
utf8_string
|
227
|
+
end
|
212
228
|
|
213
229
|
end
|