rightmove_wrangler 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
1
  module RightmoveWrangler
2
- VERSION = "0.1.8"
2
+ VERSION = "0.1.9"
3
3
  end
@@ -96,7 +96,7 @@ module RightmoveWrangler
96
96
  end
97
97
 
98
98
  def work_blm_file(file)
99
- blm = BLM::Document.new( File.open(file, "r").read )
99
+ blm = BLM::Document.new( force_convert_utf8(File.open(file, "r").read) )
100
100
  rows = blm.data.collect do |row|
101
101
  row_hash = {}
102
102
  row.attributes.each do |key, value|
@@ -209,5 +209,21 @@ module RightmoveWrangler
209
209
  end
210
210
  end
211
211
  end
212
+
213
+ def force_convert_utf8(string)
214
+ begin
215
+ # Try it as UTF-8 directly
216
+ cleaned = string.dup.force_encoding('UTF-8')
217
+ unless cleaned.valid_encoding?
218
+ # Some of it might be old Windows code page
219
+ cleaned = string.encode( 'UTF-8', 'Windows-1252' )
220
+ end
221
+ utf8_string = cleaned
222
+ rescue EncodingError
223
+ # Force it to UTF-8, throwing out invalid bits
224
+ utf8_string = string.encode!( 'UTF-8', invalid: :replace, undef: :replace )
225
+ end
226
+ utf8_string
227
+ end
212
228
 
213
229
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rightmove_wrangler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: