siefca-httpage 0.0.4 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/httpage/httpage.rb +5 -2
  2. data/lib/httpage.rb +1 -0
  3. metadata +12 -3
@@ -174,16 +174,19 @@ class HTTPage
174
174
  gsub(/<.*?>/m, ''))
175
175
  end
176
176
 
177
- # Transliterates text to ASCII and removes unknown characters.
177
+ # Transliterates text to ASCII and removes unknown characters leaving just words.
178
178
 
179
179
  def clean_text(text=nil, enc=nil)
180
180
  text ||= self.body
181
181
  enc ||= self.encoding
182
182
  page = Iconv.iconv('UTF-8//IGNORE', enc, text).join
183
- page = Iconv.iconv('ASCII//TRANSLIT//IGNORE', 'UTF-8', strip_html(page)).join.downcase
183
+ page = strip_html(page)
184
+ page.gsub!(/['`]/m, '_amp__')
185
+ page = Iconv.iconv('ASCII//TRANSLIT//IGNORE', 'UTF-8', page).join.downcase
184
186
  page.tr!(".!?", ' ')
185
187
  page.gsub!(/[^\x00-\x7F]+/, '')
186
188
  page.gsub!(/[^a-z0-9\-_\+\s\n\.\!\?]+/im, '')
189
+ page.gsub!('_amp__',"'")
187
190
  page.gsub!(%r{[.*?]}mi, '')
188
191
  page.squeeze!(" \n")
189
192
  page.gsub!(/^\s?\n\s?$/m, '')
data/lib/httpage.rb CHANGED
@@ -8,3 +8,4 @@
8
8
 
9
9
  require 'httpage/bufferaffects'
10
10
  require 'httpage/httpage'
11
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: siefca-httpage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Pawe\xC5\x82 Wilk"
@@ -11,8 +11,17 @@ cert_chain: []
11
11
 
12
12
  date: 2009-04-22 00:00:00 -07:00
13
13
  default_executable:
14
- dependencies: []
15
-
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: htmlentities
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
16
25
  description: httpage is simple HTTP(S) reader with ability to transliterate body
17
26
  email: pw@gnu.org
18
27
  executables: []