pluto-update 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pluto/update/fetcher.rb +34 -2
- data/lib/pluto/update/subscriber.rb +3 -1
- data/lib/pluto/update/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 39836180edc940c1dd06d5161c18d7991c551715
|
4
|
+
data.tar.gz: 74c2f94f4da7e426bdc7c806672f9625dc35d081
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: adf0ec25e897b3eb3d0f1f44fe9178b4303eec1b535bf9ac98390002e87f3cc86e0208b3284a43215842053fdcc5db5ebe5eb99417badb0085aa806e0ee1a7ba
|
7
|
+
data.tar.gz: 7b8035080c0d059e14c455a4c9591c50b5966904429e9149e2c35512e671d869b15c6322cad6321dbf3293cd89d2c10e47cd652da156f0544b09d111d2e5a48e
|
data/lib/pluto/update/fetcher.rb
CHANGED
@@ -144,13 +144,44 @@ class Fetcher
|
|
144
144
|
# - try to change encoding to UTF-8 ourselves
|
145
145
|
logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
|
146
146
|
|
147
|
+
|
147
148
|
#####
|
148
149
|
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
149
150
|
|
151
|
+
# try Converting ASCII-8BIT to UTF-8 based domain-specific guesses
|
152
|
+
begin
|
153
|
+
# Try it as UTF-8 directly
|
154
|
+
# Note: make a copy/dup - otherwise convert fails (because string is already changed/corrupted)
|
155
|
+
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::UTF_8 )
|
156
|
+
unless feed_xml_cleaned.valid_encoding?
|
157
|
+
|
158
|
+
puts "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1"
|
159
|
+
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding not valid utf8 - trying latin1" )
|
160
|
+
# Some of it might be old Windows code page
|
161
|
+
# -- (Windows Code Page CP1252 is ISO_8859_1 is Latin-1 - check ??)
|
162
|
+
|
163
|
+
# tell ruby the encoding
|
164
|
+
# encode to utf-8
|
165
|
+
## use all in code encode ?? e.g. feed_xml_cleaned = feed_xml.encode( Encoding::UTF_8, Encoding::ISO_8859_1 )
|
166
|
+
feed_xml_cleaned = feed_xml.dup.force_encoding( Encoding::ISO_8859_1 ).encode( Encoding::UTF_8 )
|
167
|
+
end
|
168
|
+
feed_xml = feed_xml_cleaned
|
169
|
+
rescue EncodingError => e
|
170
|
+
puts "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}"
|
171
|
+
Activity.create!( text: "*** warn: feed '#{feed_key}' charset encoding to utf8 failed; throwing out invalid bits - #{e.to_s}" )
|
172
|
+
|
173
|
+
# Force it to UTF-8, throwing out invalid bits
|
174
|
+
## todo: check options - add ?? or something to mark invalid chars ???
|
175
|
+
feed_xml.encode!( Encoding::UTF_8, :invalid => :replace, :undef => :replace )
|
176
|
+
end
|
177
|
+
|
150
178
|
## NB:
|
151
179
|
# for now "hardcoded" to utf8 - what else can we do?
|
152
180
|
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
153
|
-
|
181
|
+
### old "simple" version
|
182
|
+
## feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
|
183
|
+
|
184
|
+
|
154
185
|
logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
|
155
186
|
|
156
187
|
## check for md5 hash for response.body
|
@@ -186,7 +217,7 @@ class Fetcher
|
|
186
217
|
|
187
218
|
### note: might crash w/ encoding errors when saving in postgress
|
188
219
|
## e.g. PG::CharacterNotInRepertoire: ERROR: ...
|
189
|
-
## catch error, log it and
|
220
|
+
## catch error, log it and stop for now
|
190
221
|
#
|
191
222
|
# in the future check for different charset than utf-8 ?? possible?? how to deal with non-utf8 charsets??
|
192
223
|
|
@@ -196,6 +227,7 @@ class Fetcher
|
|
196
227
|
# log db error; and continue
|
197
228
|
puts "*** error: updating feed database record '#{feed_key}' - #{e.to_s}"
|
198
229
|
Activity.create!( text: "*** error: updating feed database record '#{feed_key}' - #{e.to_s}" )
|
230
|
+
return nil # sorry; corrupt feed; parsing not possible; fix char encoding - make it an option in config??
|
199
231
|
end
|
200
232
|
|
201
233
|
|
@@ -75,7 +75,9 @@ class Subscriber
|
|
75
75
|
title: feed_hash[ 'title' ] || feed_hash[ 'name' ],
|
76
76
|
title2: feed_hash[ 'title2' ],
|
77
77
|
includes: feed_hash[ 'includes' ] || feed_hash[ 'include' ],
|
78
|
-
excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ]
|
78
|
+
excludes: feed_hash[ 'excludes' ] || feed_hash[ 'exclude' ],
|
79
|
+
## todo/future: add option for adding encoding - might not always be utf8 !!!!
|
80
|
+
## encoding: feed_hash[ 'encoding' ] || feed_hash[ 'charset' ] || 'utf8', ## default to utf8
|
79
81
|
}
|
80
82
|
|
81
83
|
puts "Updating feed subscription >#{feed_key}< - >#{feed_attribs[:feed_url]}<..."
|
data/lib/pluto/update/version.rb
CHANGED