pluto-update 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b4f3fca682131a8aa9d60081466b12c4ec0b6cc9
4
- data.tar.gz: 040a6c5e4fa0dede47c35da2217d890bf2791cae
3
+ metadata.gz: 39836180edc940c1dd06d5161c18d7991c551715
4
+ data.tar.gz: 74c2f94f4da7e426bdc7c806672f9625dc35d081
5
5
  SHA512:
6
- metadata.gz: 9d95b94f9e0ef5b9554c14af12be6bc04641405209299a4b069665710e90b5bf6b98138c3c25b6ed5e6f0d8f0bf401c6fb5009c95c5378f83c062bf4640c6979
7
- data.tar.gz: 275a2b93af9508f35f3c57f76cc3fa7db2024a4f5f0362427ac56e3c662196fb97579900bbeae3997399b6fc44eb07f1e3e5bd9195ff74c6c738da29933d07d4
6
+ metadata.gz: adf0ec25e897b3eb3d0f1f44fe9178b4303eec1b535bf9ac98390002e87f3cc86e0208b3284a43215842053fdcc5db5ebe5eb99417badb0085aa806e0ee1a7ba
7
+ data.tar.gz: 7b8035080c0d059e14c455a4c9591c50b5966904429e9149e2c35512e671d869b15c6322cad6321dbf3293cd89d2c10e47cd652da156f0544b09d111d2e5a48e
@@ -144,13 +144,44 @@ class Fetcher
144
144
  # - try to change encoding to UTF-8 ourselves
145
145
  logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
146
146
 
147
+
147
148
  #####
148
149
  # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
149
150
 
151
+ # try Converting ASCII-8BIT to UTF-8 based domain-specific guesses
152
+ begin
153
+ # Try it as UTF-8 directly
154
+ # Note: make a copy/dup - otherwise convert fails (because string is already changed/corrupted)
155
+ feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::UTF_8 )
156
+ unless feed_xml_cleaned.valid_encoding?
157
+
158
+ puts "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1"
159
+ Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1" )
160
+ # Some of it might be old Windows code page
161
+ # -- (Windows Code Page CP1252 is ISO_8859_1 is Latin-1 - check ??)
162
+
163
+ # tell ruby the encoding
164
+ # encode to utf-8
165
+ ## use all in code encode ?? e.g. feed_xml_cleaned = feed_xml.encode( Encoding::UTF_8, Encoding::ISO_8859_1 )
166
+ feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::ISO_8859_1 ).encode( Encoding::UTF_8 )
167
+ end
168
+ feed_xml = feed_xml_cleaned
169
+ rescue EncodingError => e
170
+ puts "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}"
171
+ Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}" )
172
+
173
+ # Force it to UTF-8, throwing out invalid bits
174
+ ## todo: check options - add ?? or something to mark invalid chars ???
175
+ feed_xml.encode!( Encoding::UTF_8, :invalid => :replace, :undef => :replace )
176
+ end
177
+
150
178
  ## NB:
151
179
  # for now "hardcoded" to utf8 - what else can we do?
152
180
  # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
153
- feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
181
+ ### old "simple" version
182
+ ## feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
183
+
184
+
154
185
  logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
155
186
 
156
187
  ## check for md5 hash for response.body
@@ -186,7 +217,7 @@ class Fetcher
186
217
 
187
218
  ### note: might crash w/ encoding errors when saving in postgress
188
219
  ## e.g. PG::CharacterNotInRepertoire: ERROR: ...
189
- ## catch error, log it and continue for now
220
+ ## catch error, log it and stop for now
190
221
  #
191
222
  # in the future check for different charset than utf-8 ?? possible?? how to deal with non-utf8 charsets??
192
223
 
@@ -196,6 +227,7 @@ class Fetcher
196
227
  # log db error; and continue
197
228
  puts "*** error: updating feed database record '#{feed_key}' - #{e.to_s}"
198
229
  Activity.create!( text: "*** error: updating feed database record '#{feed_key}' - #{e.to_s}" )
230
+ return nil # sorry; corrupt feed; parsing not possible; fix char encoding - make it an option in config??
199
231
  end
200
232
 
201
233
 
@@ -75,7 +75,9 @@ class Subscriber
75
75
  title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
76
76
  title2: feed_hash[ 'title2' ],
77
77
  includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
78
- excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ]
78
+ excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ],
79
+ ## todo/future: add option for adding encoding - might not always be utf8 !!!!
80
+ ## encoding: feed_hash[ 'encoding' ] || feed_hash[ 'charset' ] || 'utf8', ## default to utf8
79
81
  }
80
82
 
81
83
  puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
@@ -5,7 +5,7 @@ module PlutoUpdate
5
5
 
6
6
  MAJOR = 1
7
7
  MINOR = 1
8
- PATCH = 1
8
+ PATCH = 2
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
11
11
  def self.version
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pluto-update
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer