siefca-httpage 0.0.8 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,17 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
2
  # encoding: utf-8
3
3
 
4
- require 'iconv'
5
- require 'htmlentities'
6
- require 'net/http'
7
- require 'net/https'
8
- require 'timeout'
9
- require 'zlib'
10
- require 'uri'
11
-
12
4
  class HTTPage
13
5
 
14
- extend BufferAffects
6
+ include BufferAffects
15
7
 
16
8
  buffers_reset_method :reset_buffers
17
9
  attr_affects_buffers :url, :encoding
@@ -174,7 +166,7 @@ class HTTPage
174
166
  gsub(/<.*?>/m, ''))
175
167
  end
176
168
 
177
- # Transliterates text to ASCII and removes unknown characters leaving just words.
169
+ # Transliterates text to ASCII and removes unknown characters.
178
170
 
179
171
  def clean_text(text=nil, enc=nil)
180
172
  text ||= self.body
@@ -185,9 +177,8 @@ class HTTPage
185
177
  page = Iconv.iconv('ASCII//TRANSLIT//IGNORE', 'UTF-8', page).join.downcase
186
178
  page.tr!(".!?", ' ')
187
179
  page.gsub!(/[^\x00-\x7F]+/, '')
188
- page.gsub!(/[^a-z0-9\-_\+\s\n\.\!\?]+/im, '')
180
+ page.gsub!(/[^a-z0-9\-_\[\]\(\)\*\=\@\#\$\%\^\&\{\}\:\;\,\<\>\+\s\n\.\!\?]+/im, '')
189
181
  page.gsub!('_amp__',"'")
190
- page.gsub!(%r{[.*?]}mi, '')
191
182
  page.squeeze!(" \n")
192
183
  page.gsub!(/^\s?\n\s?$/m, '')
193
184
  page.gsub!(/\n\s/,"\n")
@@ -201,5 +192,13 @@ class HTTPage
201
192
 
202
193
  def clean; clean_text end
203
194
 
195
+ # Transliterates text to ASCII and removes unknown characters leaving just words.
196
+
197
+ def clean_words(text=nil, enc=nil)
198
+ clean_text(text, enc).
199
+ gsub(%r{[.*?]}mi, ' ').
200
+ gsub(/[^a-z0-9]+/im, ' ')
201
+ end
202
+
204
203
  end
205
204
 
data/lib/httpage.rb CHANGED
@@ -6,5 +6,14 @@
6
6
  # Copyright:: Copyright (c) 2009 Paweł Wilk
7
7
  # License:: LGPL
8
8
 
9
- require 'httpage/bufferaffects'
9
+ require 'iconv'
10
+ require 'htmlentities'
11
+ require 'net/http'
12
+ require 'net/https'
13
+ require 'timeout'
14
+ require 'zlib'
15
+ require 'uri'
16
+
17
+ require 'bufferaffects'
10
18
  require 'httpage/httpage'
19
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: siefca-httpage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Pawe\xC5\x82 Wilk"
@@ -22,7 +22,17 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: "0"
24
24
  version:
25
- description: httpage is simple HTTP(S) reader with ability to transliterate body
25
+ - !ruby/object:Gem::Dependency
26
+ name: bufferaffects
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ description: HTTPage is simple HTTP(S) reader with ability to transliterate body
26
36
  email: pw@gnu.org
27
37
  executables: []
28
38
 
@@ -33,7 +43,6 @@ extra_rdoc_files: []
33
43
  files:
34
44
  - lib/httpage.rb
35
45
  - lib/httpage/httpage.rb
36
- - lib/httpage/bufferaffects.rb
37
46
  has_rdoc: true
38
47
  homepage: http://randomseed.pl/httpage
39
48
  post_install_message:
@@ -59,6 +68,6 @@ rubyforge_project:
59
68
  rubygems_version: 1.2.0
60
69
  signing_key:
61
70
  specification_version: 2
62
- summary: httpage is simple HTTP(S) reader with ability to transliterate body
71
+ summary: HTTPage is simple HTTP(S) reader with ability to transliterate body
63
72
  test_files: []
64
73
 
@@ -1,224 +0,0 @@
1
- # = httpage/bufferaffects
2
- #
3
- # Author:: Paweł Wilk (mailto:pw@gnu.org)
4
- # Copyright:: Copyright (c) 2009 Paweł Wilk
5
- # License:: LGPL
6
- #
7
-
8
- # This module is intended to be used as extension
9
- # (class level mixin) for classes using some buffers
10
- # that may be altered by calling certain methods.
11
- #
12
- # It automates resetting of buffers by installing
13
- # wrappers for invasive methods you choose. It rewrites
14
- # selected methods by adding to them code that calls
15
- # buffer(s) flushing method created by you.
16
- #
17
- # === Markers
18
- #
19
- # To select which methods are invasive for your buffer(s)
20
- # you should use markers which in usage are similar to
21
- # accessors, e.g:
22
- #
23
- # attr_affects_buffers :domain
24
- #
25
- # Markers may be placed anywhere in the class. Wrapping
26
- # routine will wait for methods to be defined if you
27
- # mark them too early in your code.
28
- #
29
- # ==== Marking methods
30
- #
31
- # To mark methods which should trigger reset operation
32
- # when called use method_affects_buffers which takes
33
- # comma-separated list of symbols describing names
34
- # of these methods.
35
- #
36
- # ==== Marking attributes (setters)
37
- #
38
- # The marker attr_affects_buffers is similar but it takes
39
- # instance members not methods as arguments. It just installs
40
- # hooks for corresponding setters.
41
- #
42
- # === Buffers flushing method
43
- #
44
- # Default instance method called to reset buffers should be
45
- # defined under name +reset_buffers+
46
- # You may also want to set up your own name by calling
47
- # buffers_reset_method class method. The name of your
48
- # buffers flushing method is passed to subclasses but
49
- # each subclass may redefine it.
50
- #
51
- # Be aware that sub-subclass
52
- # will still need redefinition since it's kind of one-level
53
- # inheritance.
54
- #
55
- # Buffers flushing method may take none or exactly one argument.
56
- # If your method will take an argument then a name of calling
57
- # method will be passed to it as symbol.
58
- #
59
- # === Inherited classes
60
- #
61
- # This module tries to be inheritance-safe but you will have to
62
- # mark methods and members in subclasses if you are going
63
- # to redefine them. The smooth way is of course to use +super+
64
- # in overloaded methods so it will also do the job.
65
- #
66
- # === Caution
67
- #
68
- # This code uses Module#method_added hook. If you're going
69
- # to redefine that method in class using this module remember
70
- # to wrap and call original version or add one line to your
71
- # definition: +ba_check_method(name)+
72
- #
73
- # === Example
74
- #
75
- # class Main
76
- #
77
- # extend BufferAffects
78
- #
79
- # buffers_reset_method :reset_path_buffer
80
- # attr_affects_buffers :subpart
81
- # attr_accessor :subpart, :otherpart
82
- #
83
- # def reset_path_buffer(name)
84
- # @path = nil
85
- # p "reset called for #{name}"
86
- # end
87
- #
88
- # def path
89
- # @path ||= @subpart.to_s + @otherpart.to_s
90
- # end
91
- #
92
- # end
93
- #
94
- # obj = Main.new
95
- # obj.subpart = 'test'
96
- # p obj.path
97
- # obj.subpart = '1234'
98
- # p obj.path
99
-
100
- module BufferAffects
101
-
102
- @@__ba_wrapped__ = {}
103
- @@__ba_reset_m__ = nil
104
-
105
- # This method sets name of method that will be used to reset buffers.
106
-
107
- def buffers_reset_method(name)
108
- name = name.to_s.strip
109
- raise ArgumentError.new('method name cannot be empty') if name.empty?
110
- @__ba_reset_method__ = name.to_sym
111
- @@__ba_reset_m__ ||= @__ba_reset_method__
112
- end
113
- private :buffers_reset_method
114
-
115
- # This method sets the marker for hook to be installed.
116
- # It ignores methods for which wrapper already exists.
117
-
118
- def method_affects_buffers(*names)
119
- @__ba_methods__ ||= {}
120
- names.uniq!
121
- names.collect! { |name| name.to_sym }
122
- names.delete_if { |name| @__ba_methods__.has_key?(name) }
123
- ba_methods_wrap(*names)
124
- end
125
- private :method_affects_buffers
126
-
127
- # This method searches for setter methods for given
128
- # member names and tries to wrap them into buffers
129
- # resetting hooks usting method_affects_buffers
130
-
131
- def attr_affects_buffers(*names)
132
- names.collect! { |name| :"#{name}=" }
133
- method_affects_buffers(*names)
134
- end
135
- private :attr_affects_buffers
136
-
137
- # This method installs hook for given methods or puts their names
138
- # on the queue if methods haven't been defined yet. The queue is
139
- # tested each time ba_check_hook is called.
140
- #
141
- # Each processed method can be in one of 2 states:
142
- # * false - method is not processed now
143
- # * true - method is now processed
144
- #
145
- # After successful wrapping method name (key) and object ID (value) pairs
146
- # are added two containers: @@__ba_wrapped__ and @__ba_methods__
147
-
148
- def ba_methods_wrap(*names)
149
- names.delete_if { |name| @__ba_methods__[name] == true } # don't handle methods being processed
150
- kmethods = public_instance_methods +
151
- private_instance_methods +
152
- protected_instance_methods
153
- install_now = names.select { |name| kmethods.include?(name) } # select methods for immediate wrapping
154
- install_now.delete_if do |name| # but don't wrap already wrapped
155
- @@__ba_wrapped__.has_key?(name) && # - wrapped by our class or other class
156
- !@__ba_methods__.has_key?(name) # - not wrapped by our class
157
- end
158
-
159
- install_later = names - install_now # collect undefined and wrapped methods
160
- install_later.each { |name| @__ba_methods__[name] = false } # and add them to the waiting queue
161
-
162
- install_now.each { |name| @__ba_methods__[name] = true } # mark methods as currently processed
163
- installed = ba_install_hook(*install_now) # and install hooks for them
164
- install_now.each { |name| @__ba_methods__[name] = false } # mark methods as not processed again
165
- installed.each_pair do |name,id| # and note the object IDs of wrapped methods
166
- @@__ba_wrapped__[name] = id # shared container
167
- @__ba_methods__[name] = id # this class's container
168
- end
169
- end
170
- private :ba_methods_wrap
171
-
172
- # This method checks whether method which name is given
173
- # is now available and should be installed.
174
-
175
- def ba_check_method(name)
176
- name = name.to_sym
177
- @__ba_methods__ ||= {}
178
- if @__ba_methods__.has_key?(name)
179
- ba_methods_wrap(name)
180
- end
181
- end
182
- private :ba_check_method
183
-
184
- # This method installs hook which alters given methods by wrapping
185
- # them into method that invokes buffers resetting routine. It will
186
- # not install hook for methods beginning with __ba, which signalizes
187
- # that they are wrappers for other methods.
188
-
189
- def ba_install_hook(*names)
190
- @__ba_reset_method__ ||= @@__ba_reset_m__
191
- @__ba_reset_method__ ||= 'reset_buffers'
192
- installed = {}
193
- names.uniq.each do |name|
194
- new_method = name.to_s
195
- next if new_method[0..3] == '__ba'
196
- orig_id = instance_method(name.to_sym).object_id
197
- orig_method = '__ba' + orig_id.to_s + '__'
198
- reset_method = @__ba_reset_method__.to_s
199
- module_eval %{
200
- alias_method :#{orig_method}, :#{new_method}
201
- private :#{orig_method}
202
- def #{new_method}(*args, &block)
203
- if method(:#{reset_method}).arity == 1
204
- #{reset_method}(:#{new_method})
205
- else
206
- #{reset_method}
207
- end
208
- return #{orig_method}(*args, &block)
209
- end
210
- }
211
- installed[name] = orig_id
212
- end
213
- return installed
214
- end
215
- private :ba_install_hook
216
-
217
- # Hook that intercepts added methods.
218
-
219
- def method_added(name)
220
- ba_check_method(name)
221
- end
222
-
223
- end
224
-