nov-ruby-openid 2.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. data/CHANGELOG +215 -0
  2. data/CHANGES-2.1.0 +36 -0
  3. data/INSTALL +47 -0
  4. data/LICENSE +210 -0
  5. data/NOTICE +2 -0
  6. data/README +81 -0
  7. data/Rakefile +98 -0
  8. data/UPGRADE +127 -0
  9. data/VERSION +1 -0
  10. data/contrib/google/ruby-openid-apps-discovery-1.0.gem +0 -0
  11. data/contrib/google/ruby-openid-apps-discovery-1.01.gem +0 -0
  12. data/examples/README +32 -0
  13. data/examples/active_record_openid_store/README +58 -0
  14. data/examples/active_record_openid_store/XXX_add_open_id_store_to_db.rb +24 -0
  15. data/examples/active_record_openid_store/XXX_upgrade_open_id_store.rb +26 -0
  16. data/examples/active_record_openid_store/init.rb +8 -0
  17. data/examples/active_record_openid_store/lib/association.rb +10 -0
  18. data/examples/active_record_openid_store/lib/nonce.rb +3 -0
  19. data/examples/active_record_openid_store/lib/open_id_setting.rb +4 -0
  20. data/examples/active_record_openid_store/lib/openid_ar_store.rb +57 -0
  21. data/examples/active_record_openid_store/test/store_test.rb +212 -0
  22. data/examples/discover +49 -0
  23. data/examples/rails_openid/README +153 -0
  24. data/examples/rails_openid/Rakefile +10 -0
  25. data/examples/rails_openid/app/controllers/application.rb +4 -0
  26. data/examples/rails_openid/app/controllers/consumer_controller.rb +122 -0
  27. data/examples/rails_openid/app/controllers/login_controller.rb +45 -0
  28. data/examples/rails_openid/app/controllers/server_controller.rb +265 -0
  29. data/examples/rails_openid/app/helpers/application_helper.rb +3 -0
  30. data/examples/rails_openid/app/helpers/login_helper.rb +2 -0
  31. data/examples/rails_openid/app/helpers/server_helper.rb +9 -0
  32. data/examples/rails_openid/app/views/consumer/index.rhtml +81 -0
  33. data/examples/rails_openid/app/views/layouts/server.rhtml +68 -0
  34. data/examples/rails_openid/app/views/login/index.rhtml +56 -0
  35. data/examples/rails_openid/app/views/server/decide.rhtml +26 -0
  36. data/examples/rails_openid/config/boot.rb +19 -0
  37. data/examples/rails_openid/config/database.yml +74 -0
  38. data/examples/rails_openid/config/environment.rb +54 -0
  39. data/examples/rails_openid/config/environments/development.rb +19 -0
  40. data/examples/rails_openid/config/environments/production.rb +19 -0
  41. data/examples/rails_openid/config/environments/test.rb +19 -0
  42. data/examples/rails_openid/config/routes.rb +24 -0
  43. data/examples/rails_openid/doc/README_FOR_APP +2 -0
  44. data/examples/rails_openid/public/.htaccess +40 -0
  45. data/examples/rails_openid/public/404.html +8 -0
  46. data/examples/rails_openid/public/500.html +8 -0
  47. data/examples/rails_openid/public/dispatch.cgi +12 -0
  48. data/examples/rails_openid/public/dispatch.fcgi +26 -0
  49. data/examples/rails_openid/public/dispatch.rb +12 -0
  50. data/examples/rails_openid/public/favicon.ico +0 -0
  51. data/examples/rails_openid/public/images/openid_login_bg.gif +0 -0
  52. data/examples/rails_openid/public/javascripts/controls.js +750 -0
  53. data/examples/rails_openid/public/javascripts/dragdrop.js +584 -0
  54. data/examples/rails_openid/public/javascripts/effects.js +854 -0
  55. data/examples/rails_openid/public/javascripts/prototype.js +1785 -0
  56. data/examples/rails_openid/public/robots.txt +1 -0
  57. data/examples/rails_openid/script/about +3 -0
  58. data/examples/rails_openid/script/breakpointer +3 -0
  59. data/examples/rails_openid/script/console +3 -0
  60. data/examples/rails_openid/script/destroy +3 -0
  61. data/examples/rails_openid/script/generate +3 -0
  62. data/examples/rails_openid/script/performance/benchmarker +3 -0
  63. data/examples/rails_openid/script/performance/profiler +3 -0
  64. data/examples/rails_openid/script/plugin +3 -0
  65. data/examples/rails_openid/script/process/reaper +3 -0
  66. data/examples/rails_openid/script/process/spawner +3 -0
  67. data/examples/rails_openid/script/process/spinner +3 -0
  68. data/examples/rails_openid/script/runner +3 -0
  69. data/examples/rails_openid/script/server +3 -0
  70. data/examples/rails_openid/test/functional/login_controller_test.rb +18 -0
  71. data/examples/rails_openid/test/functional/server_controller_test.rb +18 -0
  72. data/examples/rails_openid/test/test_helper.rb +28 -0
  73. data/lib/hmac/hmac.rb +112 -0
  74. data/lib/hmac/sha1.rb +11 -0
  75. data/lib/hmac/sha2.rb +25 -0
  76. data/lib/openid.rb +20 -0
  77. data/lib/openid/association.rb +249 -0
  78. data/lib/openid/consumer.rb +395 -0
  79. data/lib/openid/consumer/associationmanager.rb +344 -0
  80. data/lib/openid/consumer/checkid_request.rb +186 -0
  81. data/lib/openid/consumer/discovery.rb +497 -0
  82. data/lib/openid/consumer/discovery_manager.rb +123 -0
  83. data/lib/openid/consumer/html_parse.rb +134 -0
  84. data/lib/openid/consumer/idres.rb +523 -0
  85. data/lib/openid/consumer/responses.rb +148 -0
  86. data/lib/openid/cryptutil.rb +115 -0
  87. data/lib/openid/dh.rb +89 -0
  88. data/lib/openid/extension.rb +39 -0
  89. data/lib/openid/extensions/ax.rb +539 -0
  90. data/lib/openid/extensions/oauth.rb +91 -0
  91. data/lib/openid/extensions/pape.rb +179 -0
  92. data/lib/openid/extensions/sreg.rb +277 -0
  93. data/lib/openid/extensions/ui.rb +53 -0
  94. data/lib/openid/extras.rb +11 -0
  95. data/lib/openid/fetchers.rb +258 -0
  96. data/lib/openid/kvform.rb +136 -0
  97. data/lib/openid/kvpost.rb +58 -0
  98. data/lib/openid/message.rb +553 -0
  99. data/lib/openid/protocolerror.rb +8 -0
  100. data/lib/openid/server.rb +1544 -0
  101. data/lib/openid/store/filesystem.rb +271 -0
  102. data/lib/openid/store/interface.rb +75 -0
  103. data/lib/openid/store/memcache.rb +107 -0
  104. data/lib/openid/store/memory.rb +84 -0
  105. data/lib/openid/store/nonce.rb +68 -0
  106. data/lib/openid/trustroot.rb +349 -0
  107. data/lib/openid/urinorm.rb +75 -0
  108. data/lib/openid/util.rb +110 -0
  109. data/lib/openid/yadis/accept.rb +148 -0
  110. data/lib/openid/yadis/constants.rb +21 -0
  111. data/lib/openid/yadis/discovery.rb +153 -0
  112. data/lib/openid/yadis/filters.rb +205 -0
  113. data/lib/openid/yadis/htmltokenizer.rb +305 -0
  114. data/lib/openid/yadis/parsehtml.rb +45 -0
  115. data/lib/openid/yadis/services.rb +42 -0
  116. data/lib/openid/yadis/xrds.rb +155 -0
  117. data/lib/openid/yadis/xri.rb +90 -0
  118. data/lib/openid/yadis/xrires.rb +99 -0
  119. data/setup.rb +1551 -0
  120. data/test/data/accept.txt +124 -0
  121. data/test/data/dh.txt +29 -0
  122. data/test/data/example-xrds.xml +14 -0
  123. data/test/data/linkparse.txt +587 -0
  124. data/test/data/n2b64 +650 -0
  125. data/test/data/test1-discover.txt +137 -0
  126. data/test/data/test1-parsehtml.txt +152 -0
  127. data/test/data/test_discover/malformed_meta_tag.html +19 -0
  128. data/test/data/test_discover/openid.html +11 -0
  129. data/test/data/test_discover/openid2.html +11 -0
  130. data/test/data/test_discover/openid2_xrds.xml +12 -0
  131. data/test/data/test_discover/openid2_xrds_no_local_id.xml +11 -0
  132. data/test/data/test_discover/openid_1_and_2.html +11 -0
  133. data/test/data/test_discover/openid_1_and_2_xrds.xml +16 -0
  134. data/test/data/test_discover/openid_1_and_2_xrds_bad_delegate.xml +17 -0
  135. data/test/data/test_discover/openid_and_yadis.html +12 -0
  136. data/test/data/test_discover/openid_no_delegate.html +10 -0
  137. data/test/data/test_discover/openid_utf8.html +11 -0
  138. data/test/data/test_discover/yadis_0entries.xml +12 -0
  139. data/test/data/test_discover/yadis_2_bad_local_id.xml +15 -0
  140. data/test/data/test_discover/yadis_2entries_delegate.xml +22 -0
  141. data/test/data/test_discover/yadis_2entries_idp.xml +21 -0
  142. data/test/data/test_discover/yadis_another_delegate.xml +14 -0
  143. data/test/data/test_discover/yadis_idp.xml +12 -0
  144. data/test/data/test_discover/yadis_idp_delegate.xml +13 -0
  145. data/test/data/test_discover/yadis_no_delegate.xml +11 -0
  146. data/test/data/test_xrds/=j3h.2007.11.14.xrds +25 -0
  147. data/test/data/test_xrds/README +12 -0
  148. data/test/data/test_xrds/delegated-20060809-r1.xrds +34 -0
  149. data/test/data/test_xrds/delegated-20060809-r2.xrds +34 -0
  150. data/test/data/test_xrds/delegated-20060809.xrds +34 -0
  151. data/test/data/test_xrds/no-xrd.xml +7 -0
  152. data/test/data/test_xrds/not-xrds.xml +2 -0
  153. data/test/data/test_xrds/prefixsometimes.xrds +34 -0
  154. data/test/data/test_xrds/ref.xrds +109 -0
  155. data/test/data/test_xrds/sometimesprefix.xrds +34 -0
  156. data/test/data/test_xrds/spoof1.xrds +25 -0
  157. data/test/data/test_xrds/spoof2.xrds +25 -0
  158. data/test/data/test_xrds/spoof3.xrds +37 -0
  159. data/test/data/test_xrds/status222.xrds +9 -0
  160. data/test/data/test_xrds/subsegments.xrds +58 -0
  161. data/test/data/test_xrds/valid-populated-xrds.xml +39 -0
  162. data/test/data/trustroot.txt +153 -0
  163. data/test/data/urinorm.txt +79 -0
  164. data/test/discoverdata.rb +131 -0
  165. data/test/test_accept.rb +170 -0
  166. data/test/test_association.rb +266 -0
  167. data/test/test_associationmanager.rb +917 -0
  168. data/test/test_ax.rb +690 -0
  169. data/test/test_checkid_request.rb +294 -0
  170. data/test/test_consumer.rb +257 -0
  171. data/test/test_cryptutil.rb +119 -0
  172. data/test/test_dh.rb +86 -0
  173. data/test/test_discover.rb +852 -0
  174. data/test/test_discovery_manager.rb +262 -0
  175. data/test/test_extension.rb +46 -0
  176. data/test/test_extras.rb +35 -0
  177. data/test/test_fetchers.rb +565 -0
  178. data/test/test_filters.rb +270 -0
  179. data/test/test_idres.rb +963 -0
  180. data/test/test_kvform.rb +165 -0
  181. data/test/test_kvpost.rb +65 -0
  182. data/test/test_linkparse.rb +101 -0
  183. data/test/test_message.rb +1116 -0
  184. data/test/test_nonce.rb +89 -0
  185. data/test/test_oauth.rb +175 -0
  186. data/test/test_openid_yadis.rb +178 -0
  187. data/test/test_pape.rb +247 -0
  188. data/test/test_parsehtml.rb +80 -0
  189. data/test/test_responses.rb +63 -0
  190. data/test/test_server.rb +2457 -0
  191. data/test/test_sreg.rb +479 -0
  192. data/test/test_stores.rb +298 -0
  193. data/test/test_trustroot.rb +113 -0
  194. data/test/test_ui.rb +93 -0
  195. data/test/test_urinorm.rb +35 -0
  196. data/test/test_util.rb +145 -0
  197. data/test/test_xrds.rb +169 -0
  198. data/test/test_xri.rb +48 -0
  199. data/test/test_xrires.rb +63 -0
  200. data/test/test_yadis_discovery.rb +220 -0
  201. data/test/testutil.rb +127 -0
  202. data/test/util.rb +53 -0
  203. metadata +336 -0
@@ -0,0 +1,205 @@
1
+ # This file contains functions and classes used for extracting
2
+ # endpoint information out of a Yadis XRD file using the REXML
3
+ # XML parser.
4
+
5
+ #
6
+ module OpenID
7
+ module Yadis
8
+ class BasicServiceEndpoint
9
+ attr_reader :type_uris, :yadis_url, :uri, :service_element
10
+
11
+ # Generic endpoint object that contains parsed service
12
+ # information, as well as a reference to the service element
13
+ # from which it was generated. If there is more than one
14
+ # xrd:Type or xrd:URI in the xrd:Service, this object represents
15
+ # just one of those pairs.
16
+ #
17
+ # This object can be used as a filter, because it implements
18
+ # fromBasicServiceEndpoint.
19
+ #
20
+ # The simplest kind of filter you can write implements
21
+ # fromBasicServiceEndpoint, which takes one of these objects.
22
+ def initialize(yadis_url, type_uris, uri, service_element)
23
+ @type_uris = type_uris
24
+ @yadis_url = yadis_url
25
+ @uri = uri
26
+ @service_element = service_element
27
+ end
28
+
29
+ # Query this endpoint to see if it has any of the given type
30
+ # URIs. This is useful for implementing other endpoint classes
31
+ # that e.g. need to check for the presence of multiple
32
+ # versions of a single protocol.
33
+ def match_types(type_uris)
34
+ return @type_uris & type_uris
35
+ end
36
+
37
+ # Trivial transform from a basic endpoint to itself. This
38
+ # method exists to allow BasicServiceEndpoint to be used as a
39
+ # filter.
40
+ #
41
+ # If you are subclassing this object, re-implement this function.
42
+ def self.from_basic_service_endpoint(endpoint)
43
+ return endpoint
44
+ end
45
+
46
+ # A hack to make both this class and its instances respond to
47
+ # this message since Ruby doesn't support static methods.
48
+ def from_basic_service_endpoint(endpoint)
49
+ return self.class.from_basic_service_endpoint(endpoint)
50
+ end
51
+
52
+ end
53
+
54
+ # Take a list of basic filters and makes a filter that
55
+ # transforms the basic filter into a top-level filter. This is
56
+ # mostly useful for the implementation of make_filter, which
57
+ # should only be needed for special cases or internal use by
58
+ # this library.
59
+ #
60
+ # This object is useful for creating simple filters for services
61
+ # that use one URI and are specified by one Type (we expect most
62
+ # Types will fit this paradigm).
63
+ #
64
+ # Creates a BasicServiceEndpoint object and apply the filter
65
+ # functions to it until one of them returns a value.
66
+ class TransformFilterMaker
67
+ attr_reader :filter_procs
68
+
69
+ # Initialize the filter maker's state
70
+ #
71
+ # filter_functions are the endpoint transformer
72
+ # Procs to apply to the basic endpoint. These are called in
73
+ # turn until one of them does not return nil, and the result
74
+ # of that transformer is returned.
75
+ def initialize(filter_procs)
76
+ @filter_procs = filter_procs
77
+ end
78
+
79
+ # Returns an array of endpoint objects produced by the
80
+ # filter procs.
81
+ def get_service_endpoints(yadis_url, service_element)
82
+ endpoints = []
83
+
84
+ # Do an expansion of the service element by xrd:Type and
85
+ # xrd:URI
86
+ Yadis::expand_service(service_element).each { |type_uris, uri, _|
87
+ # Create a basic endpoint object to represent this
88
+ # yadis_url, Service, Type, URI combination
89
+ endpoint = BasicServiceEndpoint.new(
90
+ yadis_url, type_uris, uri, service_element)
91
+
92
+ e = apply_filters(endpoint)
93
+ if !e.nil?
94
+ endpoints << e
95
+ end
96
+ }
97
+ return endpoints
98
+ end
99
+
100
+ def apply_filters(endpoint)
101
+ # Apply filter procs to an endpoint until one of them returns
102
+ # non-nil.
103
+ @filter_procs.each { |filter_proc|
104
+ e = filter_proc.call(endpoint)
105
+ if !e.nil?
106
+ # Once one of the filters has returned an endpoint, do not
107
+ # apply any more.
108
+ return e
109
+ end
110
+ }
111
+
112
+ return nil
113
+ end
114
+ end
115
+
116
+ class CompoundFilter
117
+ attr_reader :subfilters
118
+
119
+ # Create a new filter that applies a set of filters to an
120
+ # endpoint and collects their results.
121
+ def initialize(subfilters)
122
+ @subfilters = subfilters
123
+ end
124
+
125
+ # Generate all endpoint objects for all of the subfilters of
126
+ # this filter and return their concatenation.
127
+ def get_service_endpoints(yadis_url, service_element)
128
+ endpoints = []
129
+ @subfilters.each { |subfilter|
130
+ endpoints += subfilter.get_service_endpoints(yadis_url, service_element)
131
+ }
132
+ return endpoints
133
+ end
134
+ end
135
+
136
+ # Exception raised when something is not able to be turned into a
137
+ # filter
138
+ @@filter_type_error = TypeError.new(
139
+ 'Expected a filter, an endpoint, a callable or a list of any of these.')
140
+
141
+ # Convert a filter-convertable thing into a filter
142
+ #
143
+ # parts should be a filter, an endpoint, a callable, or a list of
144
+ # any of these.
145
+ def self.make_filter(parts)
146
+ # Convert the parts into a list, and pass to mk_compound_filter
147
+ if parts.nil?
148
+ parts = [BasicServiceEndpoint]
149
+ end
150
+
151
+ if parts.is_a?(Array)
152
+ return mk_compound_filter(parts)
153
+ else
154
+ return mk_compound_filter([parts])
155
+ end
156
+ end
157
+
158
+ # Create a filter out of a list of filter-like things
159
+ #
160
+ # Used by make_filter
161
+ #
162
+ # parts should be a list of things that can be passed to make_filter
163
+ def self.mk_compound_filter(parts)
164
+
165
+ if !parts.respond_to?('each')
166
+ raise TypeError, "#{parts.inspect} is not iterable"
167
+ end
168
+
169
+ # Separate into a list of callables and a list of filter objects
170
+ transformers = []
171
+ filters = []
172
+ parts.each { |subfilter|
173
+ if !subfilter.is_a?(Array)
174
+ # If it's not an iterable
175
+ if subfilter.respond_to?('get_service_endpoints')
176
+ # It's a full filter
177
+ filters << subfilter
178
+ elsif subfilter.respond_to?('from_basic_service_endpoint')
179
+ # It's an endpoint object, so put its endpoint conversion
180
+ # attribute into the list of endpoint transformers
181
+ transformers << subfilter.method('from_basic_service_endpoint')
182
+ elsif subfilter.respond_to?('call')
183
+ # It's a proc, so add it to the list of endpoint
184
+ # transformers
185
+ transformers << subfilter
186
+ else
187
+ raise @@filter_type_error
188
+ end
189
+ else
190
+ filters << mk_compound_filter(subfilter)
191
+ end
192
+ }
193
+
194
+ if transformers.length > 0
195
+ filters << TransformFilterMaker.new(transformers)
196
+ end
197
+
198
+ if filters.length == 1
199
+ return filters[0]
200
+ else
201
+ return CompoundFilter.new(filters)
202
+ end
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,305 @@
1
+ # = HTMLTokenizer
2
+ #
3
+ # Author:: Ben Giddings (mailto:bg-rubyforge@infofiend.com)
4
+ # Copyright:: Copyright (c) 2004 Ben Giddings
5
+ # License:: Distributes under the same terms as Ruby
6
+ #
7
+ #
8
+ # This is a partial port of the functionality behind Perl's TokeParser
9
+ # Provided a page it progressively returns tokens from that page
10
+ #
11
+ # $Id: htmltokenizer.rb,v 1.7 2005/06/07 21:05:53 merc Exp $
12
+
13
+ #
14
+ # A class to tokenize HTML.
15
+ #
16
+ # Example:
17
+ #
18
+ # page = "<HTML>
19
+ # <HEAD>
20
+ # <TITLE>This is the title</TITLE>
21
+ # </HEAD>
22
+ # <!-- Here comes the <a href=\"missing.link\">blah</a>
23
+ # comment body
24
+ # -->
25
+ # <BODY>
26
+ # <H1>This is the header</H1>
27
+ # <P>
28
+ # This is the paragraph, it contains
29
+ # <a href=\"link.html\">links</a>,
30
+ # <img src=\"blah.gif\" optional alt='images
31
+ # are
32
+ # really cool'>. Ok, here is some more text and
33
+ # <A href=\"http://another.link.com/\" target=\"_blank\">another link</A>.
34
+ # </P>
35
+ # </body>
36
+ # </HTML>
37
+ # "
38
+ # toke = HTMLTokenizer.new(page)
39
+ #
40
+ # assert("<h1>" == toke.getTag("h1", "h2", "h3").to_s.downcase)
41
+ # assert(HTMLTag.new("<a href=\"link.html\">") == toke.getTag("IMG", "A"))
42
+ # assert("links" == toke.getTrimmedText)
43
+ # assert(toke.getTag("IMG", "A").attr_hash['optional'])
44
+ # assert("_blank" == toke.getTag("IMG", "A").attr_hash['target'])
45
+ #
46
+ class HTMLTokenizer
47
+ @@version = 1.0
48
+
49
+ # Get version of HTMLTokenizer lib
50
+ def self.version
51
+ @@version
52
+ end
53
+
54
+ attr_reader :page
55
+
56
+ # Create a new tokenizer, based on the content, used as a string.
57
+ def initialize(content)
58
+ @page = content.to_s
59
+ @cur_pos = 0
60
+ end
61
+
62
+ # Reset the parser, setting the current position back at the stop
63
+ def reset
64
+ @cur_pos = 0
65
+ end
66
+
67
+ # Look at the next token, but don't actually grab it
68
+ def peekNextToken
69
+ if @cur_pos == @page.length then return nil end
70
+
71
+ if ?< == @page[@cur_pos]
72
+ # Next token is a tag of some kind
73
+ if '!--' == @page[(@cur_pos + 1), 3]
74
+ # Token is a comment
75
+ tag_end = @page.index('-->', (@cur_pos + 1))
76
+ if tag_end.nil?
77
+ raise HTMLTokenizerError, "No end found to started comment:\n#{@page[@cur_pos,80]}"
78
+ end
79
+ # p @page[@cur_pos .. (tag_end+2)]
80
+ HTMLComment.new(@page[@cur_pos .. (tag_end + 2)])
81
+ else
82
+ # Token is a html tag
83
+ tag_end = @page.index('>', (@cur_pos + 1))
84
+ if tag_end.nil?
85
+ raise HTMLTokenizerError, "No end found to started tag:\n#{@page[@cur_pos,80]}"
86
+ end
87
+ # p @page[@cur_pos .. tag_end]
88
+ HTMLTag.new(@page[@cur_pos .. tag_end])
89
+ end
90
+ else
91
+ # Next token is text
92
+ text_end = @page.index('<', @cur_pos)
93
+ text_end = text_end.nil? ? -1 : (text_end - 1)
94
+ # p @page[@cur_pos .. text_end]
95
+ HTMLText.new(@page[@cur_pos .. text_end])
96
+ end
97
+ end
98
+
99
+ # Get the next token, returns an instance of
100
+ # * HTMLText
101
+ # * HTMLToken
102
+ # * HTMLTag
103
+ def getNextToken
104
+ token = peekNextToken
105
+ if token
106
+ # @page = @page[token.raw.length .. -1]
107
+ # @page.slice!(0, token.raw.length)
108
+ @cur_pos += token.raw.length
109
+ end
110
+ #p token
111
+ #print token.raw
112
+ return token
113
+ end
114
+
115
+ # Get a tag from the specified set of desired tags.
116
+ # For example:
117
+ # <tt>foo = toke.getTag("h1", "h2", "h3")</tt>
118
+ # Will return the next header tag encountered.
119
+ def getTag(*sought_tags)
120
+ sought_tags.collect! {|elm| elm.downcase}
121
+
122
+ while (tag = getNextToken)
123
+ if tag.kind_of?(HTMLTag) and
124
+ (0 == sought_tags.length or sought_tags.include?(tag.tag_name))
125
+ break
126
+ end
127
+ end
128
+ tag
129
+ end
130
+
131
+ # Get all the text between the current position and the next tag
132
+ # (if specified) or a specific later tag
133
+ def getText(until_tag = nil)
134
+ if until_tag.nil?
135
+ if ?< == @page[@cur_pos]
136
+ # Next token is a tag, not text
137
+ ""
138
+ else
139
+ # Next token is text
140
+ getNextToken.text
141
+ end
142
+ else
143
+ ret_str = ""
144
+
145
+ while (tag = peekNextToken)
146
+ if tag.kind_of?(HTMLTag) and tag.tag_name == until_tag
147
+ break
148
+ end
149
+
150
+ if ("" != tag.text)
151
+ ret_str << (tag.text + " ")
152
+ end
153
+ getNextToken
154
+ end
155
+
156
+ ret_str
157
+ end
158
+ end
159
+
160
+ # Like getText, but squeeze all whitespace, getting rid of
161
+ # leading and trailing whitespace, and squeezing multiple
162
+ # spaces into a single space.
163
+ def getTrimmedText(until_tag = nil)
164
+ getText(until_tag).strip.gsub(/\s+/m, " ")
165
+ end
166
+
167
+ end
168
+
169
+ class HTMLTokenizerError < Exception
170
+ end
171
+
172
+ # The parent class for all three types of HTML tokens
173
+ class HTMLToken
174
+ attr_accessor :raw
175
+
176
+ # Initialize the token based on the raw text
177
+ def initialize(text)
178
+ @raw = text
179
+ end
180
+
181
+ # By default, return exactly the string used to create the text
182
+ def to_s
183
+ raw
184
+ end
185
+
186
+ # By default tokens have no text representation
187
+ def text
188
+ ""
189
+ end
190
+
191
+ def trimmed_text
192
+ text.strip.gsub(/\s+/m, " ")
193
+ end
194
+
195
+ # Compare to another based on the raw source
196
+ def ==(other)
197
+ raw == other.to_s
198
+ end
199
+ end
200
+
201
+ # Class representing text that isn't inside a tag
202
+ class HTMLText < HTMLToken
203
+ def text
204
+ raw
205
+ end
206
+ end
207
+
208
+ # Class representing an HTML comment
209
+ class HTMLComment < HTMLToken
210
+ attr_accessor :contents
211
+ def initialize(text)
212
+ super(text)
213
+ temp_arr = text.scan(/^<!--\s*(.*?)\s*-->$/m)
214
+ if temp_arr[0].nil?
215
+ raise HTMLTokenizerError, "Text passed to HTMLComment.initialize is not a comment"
216
+ end
217
+
218
+ @contents = temp_arr[0][0]
219
+ end
220
+ end
221
+
222
+ # Class representing an HTML tag
223
+ class HTMLTag < HTMLToken
224
+ attr_reader :end_tag, :tag_name
225
+ def initialize(text)
226
+ super(text)
227
+ if ?< != text[0] or ?> != text[-1]
228
+ raise HTMLTokenizerError, "Text passed to HTMLComment.initialize is not a comment"
229
+ end
230
+
231
+ @attr_hash = Hash.new
232
+ @raw = text
233
+
234
+ tag_name = text.scan(/[\w:-]+/)[0]
235
+ if tag_name.nil?
236
+ raise HTMLTokenizerError, "Error, tag is nil: #{tag_name}"
237
+ end
238
+
239
+ if ?/ == text[1]
240
+ # It's an end tag
241
+ @end_tag = true
242
+ @tag_name = '/' + tag_name.downcase
243
+ else
244
+ @end_tag = false
245
+ @tag_name = tag_name.downcase
246
+ end
247
+
248
+ @hashed = false
249
+ end
250
+
251
+ # Retrieve a hash of all the tag's attributes.
252
+ # Lazily done, so that if you don't look at a tag's attributes
253
+ # things go quicker
254
+ def attr_hash
255
+ # Lazy initialize == don't build the hash until it's needed
256
+ if !@hashed
257
+ if !@end_tag
258
+ # Get the attributes
259
+ attr_arr = @raw.scan(/<[\w:-]+\s+(.*?)\/?>/m)[0]
260
+ if attr_arr.kind_of?(Array)
261
+ # Attributes found, parse them
262
+ attrs = attr_arr[0]
263
+ attr_arr = attrs.scan(/\s*([\w:-]+)(?:\s*=\s*("[^"]*"|'[^']*'|([^"'>][^\s>]*)))?/m)
264
+ # clean up the array by:
265
+ # * setting all nil elements to true
266
+ # * removing enclosing quotes
267
+ attr_arr.each {
268
+ |item|
269
+ val = if item[1].nil?
270
+ item[0]
271
+ elsif '"'[0] == item[1][0] or '\''[0] == item[1][0]
272
+ item[1][1 .. -2]
273
+ else
274
+ item[1]
275
+ end
276
+ @attr_hash[item[0].downcase] = val
277
+ }
278
+ end
279
+ end
280
+ @hashed = true
281
+ end
282
+
283
+ #p self
284
+
285
+ @attr_hash
286
+ end
287
+
288
+ # Get the 'alt' text for a tag, if it exists, or an empty string otherwise
289
+ def text
290
+ if !end_tag
291
+ case tag_name
292
+ when 'img'
293
+ if !attr_hash['alt'].nil?
294
+ return attr_hash['alt']
295
+ end
296
+ when 'applet'
297
+ if !attr_hash['alt'].nil?
298
+ return attr_hash['alt']
299
+ end
300
+ end
301
+ end
302
+ return ''
303
+ end
304
+ end
305
+