siefca-httpage 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/httpage/httpage.rb +5 -2
  2. data/lib/httpage.rb +1 -0
  3. metadata +12 -3
@@ -174,16 +174,19 @@ class HTTPage
174
174
  gsub(/<.*?>/m, ''))
175
175
  end
176
176
 
177
- # Transliterates text to ASCII and removes unknown characters.
177
+ # Transliterates text to ASCII and removes unknown characters leaving just words.
178
178
 
179
179
  def clean_text(text=nil, enc=nil)
180
180
  text ||= self.body
181
181
  enc ||= self.encoding
182
182
  page = Iconv.iconv('UTF-8//IGNORE', enc, text).join
183
- page = Iconv.iconv('ASCII//TRANSLIT//IGNORE', 'UTF-8', strip_html(page)).join.downcase
183
+ page = strip_html(page)
184
+ page.gsub!(/['`]/m, '_amp__')
185
+ page = Iconv.iconv('ASCII//TRANSLIT//IGNORE', 'UTF-8', page).join.downcase
184
186
  page.tr!(".!?", ' ')
185
187
  page.gsub!(/[^\x00-\x7F]+/, '')
186
188
  page.gsub!(/[^a-z0-9\-_\+\s\n\.\!\?]+/im, '')
189
+ page.gsub!('_amp__',"'")
187
190
  page.gsub!(%r{[.*?]}mi, '')
188
191
  page.squeeze!(" \n")
189
192
  page.gsub!(/^\s?\n\s?$/m, '')
data/lib/httpage.rb CHANGED
@@ -8,3 +8,4 @@
8
8
 
9
9
  require 'httpage/bufferaffects'
10
10
  require 'httpage/httpage'
11
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: siefca-httpage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Pawe\xC5\x82 Wilk"
@@ -11,8 +11,17 @@ cert_chain: []
11
11
 
12
12
  date: 2009-04-22 00:00:00 -07:00
13
13
  default_executable:
14
- dependencies: []
15
-
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: htmlentities
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
16
25
  description: httpage is simple HTTP(S) reader with ability to transliterate body
17
26
  email: pw@gnu.org
18
27
  executables: []