rightmove_wrangler 0.1.8 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,3 @@
1
1
  module RightmoveWrangler
2
- VERSION = "0.1.8"
2
+ VERSION = "0.1.9"
3
3
  end
@@ -96,7 +96,7 @@ module RightmoveWrangler
96
96
  end
97
97
 
98
98
  def work_blm_file(file)
99
- blm = BLM::Document.new( File.open(file, "r").read )
99
+ blm = BLM::Document.new( force_convert_utf8(File.open(file, "r").read) )
100
100
  rows = blm.data.collect do |row|
101
101
  row_hash = {}
102
102
  row.attributes.each do |key, value|
@@ -209,5 +209,21 @@ module RightmoveWrangler
209
209
  end
210
210
  end
211
211
  end
212
+
213
+ def force_convert_utf8(string)
214
+ begin
215
+ # Try it as UTF-8 directly
216
+ cleaned = string.dup.force_encoding('UTF-8')
217
+ unless cleaned.valid_encoding?
218
+ # Some of it might be old Windows code page
219
+ cleaned = string.encode( 'UTF-8', 'Windows-1252' )
220
+ end
221
+ utf8_string = cleaned
222
+ rescue EncodingError
223
+ # Force it to UTF-8, throwing out invalid bits
224
+ utf8_string = string.encode!( 'UTF-8', invalid: :replace, undef: :replace )
225
+ end
226
+ utf8_string
227
+ end
212
228
 
213
229
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rightmove_wrangler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: