pluto-update 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pluto/update/fetcher.rb +34 -2
- data/lib/pluto/update/subscriber.rb +3 -1
- data/lib/pluto/update/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 39836180edc940c1dd06d5161c18d7991c551715
|
4
|
+
data.tar.gz: 74c2f94f4da7e426bdc7c806672f9625dc35d081
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: adf0ec25e897b3eb3d0f1f44fe9178b4303eec1b535bf9ac98390002e87f3cc86e0208b3284a43215842053fdcc5db5ebe5eb99417badb0085aa806e0ee1a7ba
|
7
|
+
data.tar.gz: 7b8035080c0d059e14c455a4c9591c50b5966904429e9149e2c35512e671d869b15c6322cad6321dbf3293cd89d2c10e47cd652da156f0544b09d111d2e5a48e
|
data/lib/pluto/update/fetcher.rb
CHANGED
@@ -144,13 +144,44 @@ class Fetcher
|
|
144
144
|
# - try to change encoding to UTF-8 ourselves
|
145
145
|
logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
|
146
146
|
|
147
|
+
|
147
148
|
#####
|
148
149
|
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
149
150
|
|
151
|
+
# try Converting ASCII-8BIT to UTF-8 based domain-specific guesses
|
152
|
+
begin
|
153
|
+
# Try it as UTF-8 directly
|
154
|
+
# Note: make a copy/dup - otherwise convert fails (because string is already changed/corrupted)
|
155
|
+
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::UTF_8 )
|
156
|
+
unless feed_xml_cleaned.valid_encoding?
|
157
|
+
|
158
|
+
puts "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1"
|
159
|
+
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1" )
|
160
|
+
# Some of it might be old Windows code page
|
161
|
+
# -- (Windows Code Page CP1252 is ISO_8859_1 is Latin-1 - check ??)
|
162
|
+
|
163
|
+
# tell ruby the encoding
|
164
|
+
# encode to utf-8
|
165
|
+
## use all in code encode ?? e.g. feed_xml_cleaned = feed_xml.encode( Encoding::UTF_8, Encoding::ISO_8859_1 )
|
166
|
+
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::ISO_8859_1 ).encode( Encoding::UTF_8 )
|
167
|
+
end
|
168
|
+
feed_xml = feed_xml_cleaned
|
169
|
+
rescue EncodingError => e
|
170
|
+
puts "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}"
|
171
|
+
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}" )
|
172
|
+
|
173
|
+
# Force it to UTF-8, throwing out invalid bits
|
174
|
+
## todo: check options - add ?? or something to mark invalid chars ???
|
175
|
+
feed_xml.encode!( Encoding::UTF_8, :invalid => :replace, :undef => :replace )
|
176
|
+
end
|
177
|
+
|
150
178
|
## NB:
|
151
179
|
# for now "hardcoded" to utf8 - what else can we do?
|
152
180
|
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
153
|
-
|
181
|
+
### old "simple" version
|
182
|
+
## feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
|
183
|
+
|
184
|
+
|
154
185
|
logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
|
155
186
|
|
156
187
|
## check for md5 hash for response.body
|
@@ -186,7 +217,7 @@ class Fetcher
|
|
186
217
|
|
187
218
|
### note: might crash w/ encoding errors when saving in postgress
|
188
219
|
## e.g. PG::CharacterNotInRepertoire: ERROR: ...
|
189
|
-
## catch error, log it and
|
220
|
+
## catch error, log it and stop for now
|
190
221
|
#
|
191
222
|
# in the future check for different charset than utf-8 ?? possible?? how to deal with non-utf8 charsets??
|
192
223
|
|
@@ -196,6 +227,7 @@ class Fetcher
|
|
196
227
|
# log db error; and continue
|
197
228
|
puts "*** error: updating feed database record '#{feed_key}' - #{e.to_s}"
|
198
229
|
Activity.create!( text: "*** error: updating feed database record '#{feed_key}' - #{e.to_s}" )
|
230
|
+
return nil # sorry; corrupt feed; parsing not possible; fix char encoding - make it an option in config??
|
199
231
|
end
|
200
232
|
|
201
233
|
|
@@ -75,7 +75,9 @@ class Subscriber
|
|
75
75
|
title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
|
76
76
|
title2: feed_hash[ 'title2' ],
|
77
77
|
includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
|
78
|
-
excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ]
|
78
|
+
excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ],
|
79
|
+
## todo/future: add option for adding encoding - might not always be utf8 !!!!
|
80
|
+
## encoding: feed_hash[ 'encoding' ] || feed_hash[ 'charset' ] || 'utf8', ## default to utf8
|
79
81
|
}
|
80
82
|
|
81
83
|
puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
|
data/lib/pluto/update/version.rb
CHANGED