siefca-httpage 0.0.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
 
4
- require 'iconv'
5
- require 'htmlentities'
6
- require 'net/http'
7
- require 'net/https'
8
- require 'timeout'
9
- require 'zlib'
10
- require 'uri'
11
-
12
4
  class HTTPage
13
5
 
14
- extend BufferAffects
6
+ include BufferAffects
15
7
 
16
8
  buffers_reset_method :reset_buffers
17
9
  attr_affects_buffers :url, :encoding
@@ -174,7 +166,7 @@ class HTTPage
174
166
  gsub(/<.*?>/m, ''))
175
167
  end
176
168
 
177
- # Transliterates text to ASCII and removes unknown characters leaving just words.
169
+ # Transliterates text to ASCII and removes unknown characters.
178
170
 
179
171
  def clean_text(text=nil, enc=nil)
180
172
  text ||= self.body
@@ -185,9 +177,8 @@ class HTTPage
185
177
  page = Iconv.iconv('ASCII//TRANSLIT//IGNORE', 'UTF-8', page).join.downcase
186
178
  page.tr!(".!?", ' ')
187
179
  page.gsub!(/[^\x00-\x7F]+/, '')
188
- page.gsub!(/[^a-z0-9\-_\+\s\n\.\!\?]+/im, '')
180
+ page.gsub!(/[^a-z0-9\-_\[\]\(\)\*\=\@\#\$\%\^\&\{\}\:\;\,\<\>\+\s\n\.\!\?]+/im, '')
189
181
  page.gsub!('_amp__',"'")
190
- page.gsub!(%r{[.*?]}mi, '')
191
182
  page.squeeze!(" \n")
192
183
  page.gsub!(/^\s?\n\s?$/m, '')
193
184
  page.gsub!(/\n\s/,"\n")
@@ -201,5 +192,13 @@ class HTTPage
201
192
 
202
193
  def clean; clean_text end
203
194
 
195
+ # Transliterates text to ASCII and removes unknown characters leaving just words.
196
+
197
+ def clean_words(text=nil, enc=nil)
198
+ clean_text(text, enc).
199
+ gsub(%r{[.*?]}mi, ' ').
200
+ gsub(/[^a-z0-9]+/im, ' ')
201
+ end
202
+
204
203
  end
205
204
 
data/lib/httpage.rb CHANGED
@@ -6,5 +6,14 @@
6
6
  # Copyright:: Copyright (c) 2009 Paweł Wilk
7
7
  # License:: LGPL
8
8
 
9
- require 'httpage/bufferaffects'
9
+ require 'iconv'
10
+ require 'htmlentities'
11
+ require 'net/http'
12
+ require 'net/https'
13
+ require 'timeout'
14
+ require 'zlib'
15
+ require 'uri'
16
+
17
+ require 'bufferaffects'
10
18
  require 'httpage/httpage'
19
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: siefca-httpage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Pawe\xC5\x82 Wilk"
@@ -22,7 +22,17 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: "0"
24
24
  version:
25
- description: httpage is simple HTTP(S) reader with ability to transliterate body
25
+ - !ruby/object:Gem::Dependency
26
+ name: bufferaffects
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ description: HTTPage is simple HTTP(S) reader with ability to transliterate body
26
36
  email: pw@gnu.org
27
37
  executables: []
28
38
 
@@ -33,7 +43,6 @@ extra_rdoc_files: []
33
43
  files:
34
44
  - lib/httpage.rb
35
45
  - lib/httpage/httpage.rb
36
- - lib/httpage/bufferaffects.rb
37
46
  has_rdoc: true
38
47
  homepage: http://randomseed.pl/httpage
39
48
  post_install_message:
@@ -59,6 +68,6 @@ rubyforge_project:
59
68
  rubygems_version: 1.2.0
60
69
  signing_key:
61
70
  specification_version: 2
62
- summary: httpage is simple HTTP(S) reader with ability to transliterate body
71
+ summary: HTTPage is simple HTTP(S) reader with ability to transliterate body
63
72
  test_files: []
64
73
 
@@ -1,224 +0,0 @@
1
- # = httpage/bufferaffects
2
- #
3
- # Author:: Paweł Wilk (mailto:pw@gnu.org)
4
- # Copyright:: Copyright (c) 2009 Paweł Wilk
5
- # License:: LGPL
6
- #
7
-
8
- # This module is intended to be used as extension
9
- # (class level mixin) for classes using some buffers
10
- # that may be altered by calling certain methods.
11
- #
12
- # It automates resetting of buffers by installing
13
- # wrappers for invasive methods you choose. It rewrites
14
- # selected methods by adding to them code that calls
15
- # buffer(s) flushing method created by you.
16
- #
17
- # === Markers
18
- #
19
- # To select which methods are invasive for your buffer(s)
20
- # you should use markers which in usage are similar to
21
- # accessors, e.g:
22
- #
23
- # attr_affects_buffers :domain
24
- #
25
- # Markers may be placed anywhere in the class. Wrapping
26
- # routine will wait for methods to be defined if you
27
- # mark them too early in your code.
28
- #
29
- # ==== Marking methods
30
- #
31
- # To mark methods which should trigger reset operation
32
- # when called use method_affects_buffers which takes
33
- # comma-separated list of symbols describing names
34
- # of these methods.
35
- #
36
- # ==== Marking attributes (setters)
37
- #
38
- # The marker attr_affects_buffers is similar but it takes
39
- # instance members not methods as arguments. It just installs
40
- # hooks for corresponding setters.
41
- #
42
- # === Buffers flushing method
43
- #
44
- # Default instance method called to reset buffers should be
45
- # defined under name +reset_buffers+
46
- # You may also want to set up your own name by calling
47
- # buffers_reset_method class method. The name of your
48
- # buffers flushing method is passed to subclasses but
49
- # each subclass may redefine it.
50
- #
51
- # Be aware that sub-subclass
52
- # will still need redefinition since it's kind of one-level
53
- # inheritance.
54
- #
55
- # Buffers flushing method may take none or exactly one argument.
56
- # If your method will take an argument then a name of calling
57
- # method will be passed to it as symbol.
58
- #
59
- # === Inherited classes
60
- #
61
- # This module tries to be inheritance-safe but you will have to
62
- # mark methods and members in subclasses if you are going
63
- # to redefine them. The smooth way is of course to use +super+
64
- # in overloaded methods so it will also do the job.
65
- #
66
- # === Caution
67
- #
68
- # This code uses Module#method_added hook. If you're going
69
- # to redefine that method in class using this module remember
70
- # to wrap and call original version or add one line to your
71
- # definition: +ba_check_method(name)+
72
- #
73
- # === Example
74
- #
75
- # class Main
76
- #
77
- # extend BufferAffects
78
- #
79
- # buffers_reset_method :reset_path_buffer
80
- # attr_affects_buffers :subpart
81
- # attr_accessor :subpart, :otherpart
82
- #
83
- # def reset_path_buffer(name)
84
- # @path = nil
85
- # p "reset called for #{name}"
86
- # end
87
- #
88
- # def path
89
- # @path ||= @subpart.to_s + @otherpart.to_s
90
- # end
91
- #
92
- # end
93
- #
94
- # obj = Main.new
95
- # obj.subpart = 'test'
96
- # p obj.path
97
- # obj.subpart = '1234'
98
- # p obj.path
99
-
100
- module BufferAffects
101
-
102
- @@__ba_wrapped__ = {}
103
- @@__ba_reset_m__ = nil
104
-
105
- # This method sets name of method that will be used to reset buffers.
106
-
107
- def buffers_reset_method(name)
108
- name = name.to_s.strip
109
- raise ArgumentError.new('method name cannot be empty') if name.empty?
110
- @__ba_reset_method__ = name.to_sym
111
- @@__ba_reset_m__ ||= @__ba_reset_method__
112
- end
113
- private :buffers_reset_method
114
-
115
- # This method sets the marker for hook to be installed.
116
- # It ignores methods for which wrapper already exists.
117
-
118
- def method_affects_buffers(*names)
119
- @__ba_methods__ ||= {}
120
- names.uniq!
121
- names.collect! { |name| name.to_sym }
122
- names.delete_if { |name| @__ba_methods__.has_key?(name) }
123
- ba_methods_wrap(*names)
124
- end
125
- private :method_affects_buffers
126
-
127
- # This method searches for setter methods for given
128
- # member names and tries to wrap them into buffers
129
- # resetting hooks usting method_affects_buffers
130
-
131
- def attr_affects_buffers(*names)
132
- names.collect! { |name| :"#{name}=" }
133
- method_affects_buffers(*names)
134
- end
135
- private :attr_affects_buffers
136
-
137
- # This method installs hook for given methods or puts their names
138
- # on the queue if methods haven't been defined yet. The queue is
139
- # tested each time ba_check_hook is called.
140
- #
141
- # Each processed method can be in one of 2 states:
142
- # * false - method is not processed now
143
- # * true - method is now processed
144
- #
145
- # After successful wrapping method name (key) and object ID (value) pairs
146
- # are added two containers: @@__ba_wrapped__ and @__ba_methods__
147
-
148
- def ba_methods_wrap(*names)
149
- names.delete_if { |name| @__ba_methods__[name] == true } # don't handle methods being processed
150
- kmethods = public_instance_methods +
151
- private_instance_methods +
152
- protected_instance_methods
153
- install_now = names.select { |name| kmethods.include?(name) } # select methods for immediate wrapping
154
- install_now.delete_if do |name| # but don't wrap already wrapped
155
- @@__ba_wrapped__.has_key?(name) && # - wrapped by our class or other class
156
- !@__ba_methods__.has_key?(name) # - not wrapped by our class
157
- end
158
-
159
- install_later = names - install_now # collect undefined and wrapped methods
160
- install_later.each { |name| @__ba_methods__[name] = false } # and add them to the waiting queue
161
-
162
- install_now.each { |name| @__ba_methods__[name] = true } # mark methods as currently processed
163
- installed = ba_install_hook(*install_now) # and install hooks for them
164
- install_now.each { |name| @__ba_methods__[name] = false } # mark methods as not processed again
165
- installed.each_pair do |name,id| # and note the object IDs of wrapped methods
166
- @@__ba_wrapped__[name] = id # shared container
167
- @__ba_methods__[name] = id # this class's container
168
- end
169
- end
170
- private :ba_methods_wrap
171
-
172
- # This method checks whether method which name is given
173
- # is now available and should be installed.
174
-
175
- def ba_check_method(name)
176
- name = name.to_sym
177
- @__ba_methods__ ||= {}
178
- if @__ba_methods__.has_key?(name)
179
- ba_methods_wrap(name)
180
- end
181
- end
182
- private :ba_check_method
183
-
184
- # This method installs hook which alters given methods by wrapping
185
- # them into method that invokes buffers resetting routine. It will
186
- # not install hook for methods beginning with __ba, which signalizes
187
- # that they are wrappers for other methods.
188
-
189
- def ba_install_hook(*names)
190
- @__ba_reset_method__ ||= @@__ba_reset_m__
191
- @__ba_reset_method__ ||= 'reset_buffers'
192
- installed = {}
193
- names.uniq.each do |name|
194
- new_method = name.to_s
195
- next if new_method[0..3] == '__ba'
196
- orig_id = instance_method(name.to_sym).object_id
197
- orig_method = '__ba' + orig_id.to_s + '__'
198
- reset_method = @__ba_reset_method__.to_s
199
- module_eval %{
200
- alias_method :#{orig_method}, :#{new_method}
201
- private :#{orig_method}
202
- def #{new_method}(*args, &block)
203
- if method(:#{reset_method}).arity == 1
204
- #{reset_method}(:#{new_method})
205
- else
206
- #{reset_method}
207
- end
208
- return #{orig_method}(*args, &block)
209
- end
210
- }
211
- installed[name] = orig_id
212
- end
213
- return installed
214
- end
215
- private :ba_install_hook
216
-
217
- # Hook that intercepts added methods.
218
-
219
- def method_added(name)
220
- ba_check_method(name)
221
- end
222
-
223
- end
224
-