pluto-update 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b4f3fca682131a8aa9d60081466b12c4ec0b6cc9
4
- data.tar.gz: 040a6c5e4fa0dede47c35da2217d890bf2791cae
3
+ metadata.gz: 39836180edc940c1dd06d5161c18d7991c551715
4
+ data.tar.gz: 74c2f94f4da7e426bdc7c806672f9625dc35d081
5
5
  SHA512:
6
- metadata.gz: 9d95b94f9e0ef5b9554c14af12be6bc04641405209299a4b069665710e90b5bf6b98138c3c25b6ed5e6f0d8f0bf401c6fb5009c95c5378f83c062bf4640c6979
7
- data.tar.gz: 275a2b93af9508f35f3c57f76cc3fa7db2024a4f5f0362427ac56e3c662196fb97579900bbeae3997399b6fc44eb07f1e3e5bd9195ff74c6c738da29933d07d4
6
+ metadata.gz: adf0ec25e897b3eb3d0f1f44fe9178b4303eec1b535bf9ac98390002e87f3cc86e0208b3284a43215842053fdcc5db5ebe5eb99417badb0085aa806e0ee1a7ba
7
+ data.tar.gz: 7b8035080c0d059e14c455a4c9591c50b5966904429e9149e2c35512e671d869b15c6322cad6321dbf3293cd89d2c10e47cd652da156f0544b09d111d2e5a48e
@@ -144,13 +144,44 @@ class Fetcher
144
144
  # - try to change encoding to UTF-8 ourselves
145
145
  logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
146
146
 
147
+
147
148
  #####
148
149
  # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
149
150
 
151
+ # try Converting ASCII-8BIT to UTF-8 based domain-specific guesses
152
+ begin
153
+ # Try it as UTF-8 directly
154
+ # Note: make a copy/dup - otherwise convert fails (because string is already changed/corrupted)
155
+ feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::UTF_8 )
156
+ unless feed_xml_cleaned.valid_encoding?
157
+
158
+ puts "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1"
159
+ Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1" )
160
+ # Some of it might be old Windows code page
161
+ # -- (Windows Code Page CP1252 is ISO_8859_1 is Latin-1 - check ??)
162
+
163
+ # tell ruby the encoding
164
+ # encode to utf-8
165
+ ## use all in code encode ?? e.g. feed_xml_cleaned = feed_xml.encode( Encoding::UTF_8, Encoding::ISO_8859_1 )
166
+ feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::ISO_8859_1 ).encode( Encoding::UTF_8 )
167
+ end
168
+ feed_xml = feed_xml_cleaned
169
+ rescue EncodingError => e
170
+ puts "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}"
171
+ Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}" )
172
+
173
+ # Force it to UTF-8, throwing out invalid bits
174
+ ## todo: check options - add ?? or something to mark invalid chars ???
175
+ feed_xml.encode!( Encoding::UTF_8, :invalid => :replace, :undef => :replace )
176
+ end
177
+
150
178
  ## NB:
151
179
  # for now "hardcoded" to utf8 - what else can we do?
152
180
  # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
153
- feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
181
+ ### old "simple" version
182
+ ## feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
183
+
184
+
154
185
  logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
155
186
 
156
187
  ## check for md5 hash for response.body
@@ -186,7 +217,7 @@ class Fetcher
186
217
 
187
218
  ### note: might crash w/ encoding errors when saving in postgress
188
219
  ## e.g. PG::CharacterNotInRepertoire: ERROR: ...
189
- ## catch error, log it and continue for now
220
+ ## catch error, log it and stop for now
190
221
  #
191
222
  # in the future check for different charset than utf-8 ?? possible?? how to deal with non-utf8 charsets??
192
223
 
@@ -196,6 +227,7 @@ class Fetcher
196
227
  # log db error; and continue
197
228
  puts "*** error: updating feed database record '#{feed_key}' - #{e.to_s}"
198
229
  Activity.create!( text: "*** error: updating feed database record '#{feed_key}' - #{e.to_s}" )
230
+ return nil # sorry; corrupt feed; parsing not possible; fix char encoding - make it an option in config??
199
231
  end
200
232
 
201
233
 
@@ -75,7 +75,9 @@ class Subscriber
75
75
  title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
76
76
  title2: feed_hash[ 'title2' ],
77
77
  includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
78
- excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ]
78
+ excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ],
79
+ ## todo/future: add option for adding encoding - might not always be utf8 !!!!
80
+ ## encoding: feed_hash[ 'encoding' ] || feed_hash[ 'charset' ] || 'utf8', ## default to utf8
79
81
  }
80
82
 
81
83
  puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
@@ -5,7 +5,7 @@ module PlutoUpdate
5
5
 
6
6
  MAJOR = 1
7
7
  MINOR = 1
8
- PATCH = 1
8
+ PATCH = 2
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
11
11
  def self.version
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pluto-update
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer