entp-ruby-openid 2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data/CHANGELOG +215 -0
  2. data/INSTALL +47 -0
  3. data/LICENSE +210 -0
  4. data/NOTICE +2 -0
  5. data/README +85 -0
  6. data/UPGRADE +127 -0
  7. data/admin/runtests.rb +45 -0
  8. data/examples/README +32 -0
  9. data/examples/active_record_openid_store/README +58 -0
  10. data/examples/active_record_openid_store/XXX_add_open_id_store_to_db.rb +24 -0
  11. data/examples/active_record_openid_store/XXX_upgrade_open_id_store.rb +26 -0
  12. data/examples/active_record_openid_store/init.rb +8 -0
  13. data/examples/active_record_openid_store/lib/association.rb +10 -0
  14. data/examples/active_record_openid_store/lib/nonce.rb +3 -0
  15. data/examples/active_record_openid_store/lib/open_id_setting.rb +4 -0
  16. data/examples/active_record_openid_store/lib/openid_ar_store.rb +57 -0
  17. data/examples/active_record_openid_store/test/store_test.rb +212 -0
  18. data/examples/discover +49 -0
  19. data/examples/rails_openid/README +153 -0
  20. data/examples/rails_openid/Rakefile +10 -0
  21. data/examples/rails_openid/app/controllers/application.rb +4 -0
  22. data/examples/rails_openid/app/controllers/consumer_controller.rb +125 -0
  23. data/examples/rails_openid/app/controllers/login_controller.rb +45 -0
  24. data/examples/rails_openid/app/controllers/server_controller.rb +265 -0
  25. data/examples/rails_openid/app/helpers/application_helper.rb +3 -0
  26. data/examples/rails_openid/app/helpers/login_helper.rb +2 -0
  27. data/examples/rails_openid/app/helpers/server_helper.rb +9 -0
  28. data/examples/rails_openid/app/views/consumer/index.rhtml +81 -0
  29. data/examples/rails_openid/app/views/layouts/server.rhtml +68 -0
  30. data/examples/rails_openid/app/views/login/index.rhtml +56 -0
  31. data/examples/rails_openid/app/views/server/decide.rhtml +26 -0
  32. data/examples/rails_openid/config/boot.rb +19 -0
  33. data/examples/rails_openid/config/database.yml +74 -0
  34. data/examples/rails_openid/config/environment.rb +54 -0
  35. data/examples/rails_openid/config/environments/development.rb +19 -0
  36. data/examples/rails_openid/config/environments/production.rb +19 -0
  37. data/examples/rails_openid/config/environments/test.rb +19 -0
  38. data/examples/rails_openid/config/routes.rb +24 -0
  39. data/examples/rails_openid/doc/README_FOR_APP +2 -0
  40. data/examples/rails_openid/public/404.html +8 -0
  41. data/examples/rails_openid/public/500.html +8 -0
  42. data/examples/rails_openid/public/dispatch.cgi +12 -0
  43. data/examples/rails_openid/public/dispatch.fcgi +26 -0
  44. data/examples/rails_openid/public/dispatch.rb +12 -0
  45. data/examples/rails_openid/public/favicon.ico +0 -0
  46. data/examples/rails_openid/public/images/openid_login_bg.gif +0 -0
  47. data/examples/rails_openid/public/javascripts/controls.js +750 -0
  48. data/examples/rails_openid/public/javascripts/dragdrop.js +584 -0
  49. data/examples/rails_openid/public/javascripts/effects.js +854 -0
  50. data/examples/rails_openid/public/javascripts/prototype.js +1785 -0
  51. data/examples/rails_openid/public/robots.txt +1 -0
  52. data/examples/rails_openid/script/about +3 -0
  53. data/examples/rails_openid/script/breakpointer +3 -0
  54. data/examples/rails_openid/script/console +3 -0
  55. data/examples/rails_openid/script/destroy +3 -0
  56. data/examples/rails_openid/script/generate +3 -0
  57. data/examples/rails_openid/script/performance/benchmarker +3 -0
  58. data/examples/rails_openid/script/performance/profiler +3 -0
  59. data/examples/rails_openid/script/plugin +3 -0
  60. data/examples/rails_openid/script/process/reaper +3 -0
  61. data/examples/rails_openid/script/process/spawner +3 -0
  62. data/examples/rails_openid/script/process/spinner +3 -0
  63. data/examples/rails_openid/script/runner +3 -0
  64. data/examples/rails_openid/script/server +3 -0
  65. data/examples/rails_openid/test/functional/login_controller_test.rb +18 -0
  66. data/examples/rails_openid/test/functional/server_controller_test.rb +18 -0
  67. data/examples/rails_openid/test/test_helper.rb +28 -0
  68. data/lib/hmac/hmac.rb +112 -0
  69. data/lib/hmac/sha1.rb +11 -0
  70. data/lib/hmac/sha2.rb +25 -0
  71. data/lib/openid.rb +22 -0
  72. data/lib/openid/association.rb +249 -0
  73. data/lib/openid/consumer.rb +395 -0
  74. data/lib/openid/consumer/associationmanager.rb +344 -0
  75. data/lib/openid/consumer/checkid_request.rb +186 -0
  76. data/lib/openid/consumer/discovery.rb +497 -0
  77. data/lib/openid/consumer/discovery_manager.rb +123 -0
  78. data/lib/openid/consumer/html_parse.rb +134 -0
  79. data/lib/openid/consumer/idres.rb +523 -0
  80. data/lib/openid/consumer/responses.rb +150 -0
  81. data/lib/openid/cryptutil.rb +115 -0
  82. data/lib/openid/dh.rb +89 -0
  83. data/lib/openid/extension.rb +39 -0
  84. data/lib/openid/extensions/ax.rb +539 -0
  85. data/lib/openid/extensions/oauth.rb +91 -0
  86. data/lib/openid/extensions/pape.rb +179 -0
  87. data/lib/openid/extensions/sreg.rb +277 -0
  88. data/lib/openid/extras.rb +11 -0
  89. data/lib/openid/fetchers.rb +258 -0
  90. data/lib/openid/kvform.rb +136 -0
  91. data/lib/openid/kvpost.rb +58 -0
  92. data/lib/openid/message.rb +553 -0
  93. data/lib/openid/protocolerror.rb +12 -0
  94. data/lib/openid/server.rb +1544 -0
  95. data/lib/openid/store.rb +10 -0
  96. data/lib/openid/store/filesystem.rb +272 -0
  97. data/lib/openid/store/interface.rb +75 -0
  98. data/lib/openid/store/memcache.rb +109 -0
  99. data/lib/openid/store/memory.rb +84 -0
  100. data/lib/openid/store/nonce.rb +68 -0
  101. data/lib/openid/trustroot.rb +349 -0
  102. data/lib/openid/urinorm.rb +75 -0
  103. data/lib/openid/util.rb +119 -0
  104. data/lib/openid/version.rb +3 -0
  105. data/lib/openid/yadis.rb +15 -0
  106. data/lib/openid/yadis/accept.rb +148 -0
  107. data/lib/openid/yadis/constants.rb +21 -0
  108. data/lib/openid/yadis/discovery.rb +153 -0
  109. data/lib/openid/yadis/filters.rb +205 -0
  110. data/lib/openid/yadis/htmltokenizer.rb +305 -0
  111. data/lib/openid/yadis/parsehtml.rb +45 -0
  112. data/lib/openid/yadis/services.rb +42 -0
  113. data/lib/openid/yadis/xrds.rb +155 -0
  114. data/lib/openid/yadis/xri.rb +90 -0
  115. data/lib/openid/yadis/xrires.rb +91 -0
  116. data/test/data/test_discover/openid_utf8.html +11 -0
  117. data/test/support/test_data_mixin.rb +127 -0
  118. data/test/support/test_util.rb +53 -0
  119. data/test/support/yadis_data.rb +131 -0
  120. data/test/support/yadis_data/accept.txt +124 -0
  121. data/test/support/yadis_data/dh.txt +29 -0
  122. data/test/support/yadis_data/example-xrds.xml +14 -0
  123. data/test/support/yadis_data/linkparse.txt +587 -0
  124. data/test/support/yadis_data/n2b64 +650 -0
  125. data/test/support/yadis_data/test1-discover.txt +137 -0
  126. data/test/support/yadis_data/test1-parsehtml.txt +152 -0
  127. data/test/support/yadis_data/test_discover/malformed_meta_tag.html +19 -0
  128. data/test/support/yadis_data/test_discover/openid.html +11 -0
  129. data/test/support/yadis_data/test_discover/openid2.html +11 -0
  130. data/test/support/yadis_data/test_discover/openid2_xrds.xml +12 -0
  131. data/test/support/yadis_data/test_discover/openid2_xrds_no_local_id.xml +11 -0
  132. data/test/support/yadis_data/test_discover/openid_1_and_2.html +11 -0
  133. data/test/support/yadis_data/test_discover/openid_1_and_2_xrds.xml +16 -0
  134. data/test/support/yadis_data/test_discover/openid_1_and_2_xrds_bad_delegate.xml +17 -0
  135. data/test/support/yadis_data/test_discover/openid_and_yadis.html +12 -0
  136. data/test/support/yadis_data/test_discover/openid_no_delegate.html +10 -0
  137. data/test/support/yadis_data/test_discover/openid_utf8.html +11 -0
  138. data/test/support/yadis_data/test_discover/yadis_0entries.xml +12 -0
  139. data/test/support/yadis_data/test_discover/yadis_2_bad_local_id.xml +15 -0
  140. data/test/support/yadis_data/test_discover/yadis_2entries_delegate.xml +22 -0
  141. data/test/support/yadis_data/test_discover/yadis_2entries_idp.xml +21 -0
  142. data/test/support/yadis_data/test_discover/yadis_another_delegate.xml +14 -0
  143. data/test/support/yadis_data/test_discover/yadis_idp.xml +12 -0
  144. data/test/support/yadis_data/test_discover/yadis_idp_delegate.xml +13 -0
  145. data/test/support/yadis_data/test_discover/yadis_no_delegate.xml +11 -0
  146. data/test/support/yadis_data/test_xrds/=j3h.2007.11.14.xrds +25 -0
  147. data/test/support/yadis_data/test_xrds/README +12 -0
  148. data/test/support/yadis_data/test_xrds/delegated-20060809-r1.xrds +34 -0
  149. data/test/support/yadis_data/test_xrds/delegated-20060809-r2.xrds +34 -0
  150. data/test/support/yadis_data/test_xrds/delegated-20060809.xrds +34 -0
  151. data/test/support/yadis_data/test_xrds/no-xrd.xml +7 -0
  152. data/test/support/yadis_data/test_xrds/not-xrds.xml +2 -0
  153. data/test/support/yadis_data/test_xrds/prefixsometimes.xrds +34 -0
  154. data/test/support/yadis_data/test_xrds/ref.xrds +109 -0
  155. data/test/support/yadis_data/test_xrds/sometimesprefix.xrds +34 -0
  156. data/test/support/yadis_data/test_xrds/spoof1.xrds +25 -0
  157. data/test/support/yadis_data/test_xrds/spoof2.xrds +25 -0
  158. data/test/support/yadis_data/test_xrds/spoof3.xrds +37 -0
  159. data/test/support/yadis_data/test_xrds/status222.xrds +9 -0
  160. data/test/support/yadis_data/test_xrds/subsegments.xrds +58 -0
  161. data/test/support/yadis_data/test_xrds/valid-populated-xrds.xml +39 -0
  162. data/test/support/yadis_data/trustroot.txt +153 -0
  163. data/test/support/yadis_data/urinorm.txt +79 -0
  164. data/test/test_accept.rb +170 -0
  165. data/test/test_association.rb +268 -0
  166. data/test/test_associationmanager.rb +918 -0
  167. data/test/test_ax.rb +690 -0
  168. data/test/test_checkid_request.rb +293 -0
  169. data/test/test_consumer.rb +260 -0
  170. data/test/test_cryptutil.rb +119 -0
  171. data/test/test_dh.rb +85 -0
  172. data/test/test_discover.rb +848 -0
  173. data/test/test_discovery_manager.rb +259 -0
  174. data/test/test_extension.rb +46 -0
  175. data/test/test_extras.rb +35 -0
  176. data/test/test_fetchers.rb +554 -0
  177. data/test/test_filters.rb +269 -0
  178. data/test/test_helper.rb +4 -0
  179. data/test/test_idres.rb +961 -0
  180. data/test/test_kvform.rb +164 -0
  181. data/test/test_kvpost.rb +64 -0
  182. data/test/test_linkparse.rb +100 -0
  183. data/test/test_message.rb +1115 -0
  184. data/test/test_nonce.rb +89 -0
  185. data/test/test_oauth.rb +176 -0
  186. data/test/test_openid_yadis.rb +177 -0
  187. data/test/test_pape.rb +248 -0
  188. data/test/test_parsehtml.rb +79 -0
  189. data/test/test_responses.rb +63 -0
  190. data/test/test_server.rb +2455 -0
  191. data/test/test_sreg.rb +479 -0
  192. data/test/test_stores.rb +292 -0
  193. data/test/test_trustroot.rb +111 -0
  194. data/test/test_urinorm.rb +34 -0
  195. data/test/test_util.rb +145 -0
  196. data/test/test_xrds.rb +167 -0
  197. data/test/test_xri.rb +48 -0
  198. data/test/test_xrires.rb +67 -0
  199. data/test/test_yadis_discovery.rb +218 -0
  200. metadata +268 -0
@@ -0,0 +1,205 @@
1
+ # This file contains functions and classes used for extracting
2
+ # endpoint information out of a Yadis XRD file using the REXML
3
+ # XML parser.
4
+
5
+ #
6
+ module OpenID
7
+ module Yadis
8
+ class BasicServiceEndpoint
9
+ attr_reader :type_uris, :yadis_url, :uri, :service_element
10
+
11
+ # Generic endpoint object that contains parsed service
12
+ # information, as well as a reference to the service element
13
+ # from which it was generated. If there is more than one
14
+ # xrd:Type or xrd:URI in the xrd:Service, this object represents
15
+ # just one of those pairs.
16
+ #
17
+ # This object can be used as a filter, because it implements
18
+ # fromBasicServiceEndpoint.
19
+ #
20
+ # The simplest kind of filter you can write implements
21
+ # fromBasicServiceEndpoint, which takes one of these objects.
22
+ def initialize(yadis_url, type_uris, uri, service_element)
23
+ @type_uris = type_uris
24
+ @yadis_url = yadis_url
25
+ @uri = uri
26
+ @service_element = service_element
27
+ end
28
+
29
+ # Query this endpoint to see if it has any of the given type
30
+ # URIs. This is useful for implementing other endpoint classes
31
+ # that e.g. need to check for the presence of multiple
32
+ # versions of a single protocol.
33
+ def match_types(type_uris)
34
+ return @type_uris & type_uris
35
+ end
36
+
37
+ # Trivial transform from a basic endpoint to itself. This
38
+ # method exists to allow BasicServiceEndpoint to be used as a
39
+ # filter.
40
+ #
41
+ # If you are subclassing this object, re-implement this function.
42
+ def self.from_basic_service_endpoint(endpoint)
43
+ return endpoint
44
+ end
45
+
46
+ # A hack to make both this class and its instances respond to
47
+ # this message since Ruby doesn't support static methods.
48
+ def from_basic_service_endpoint(endpoint)
49
+ return self.class.from_basic_service_endpoint(endpoint)
50
+ end
51
+
52
+ end
53
+
54
+ # Take a list of basic filters and makes a filter that
55
+ # transforms the basic filter into a top-level filter. This is
56
+ # mostly useful for the implementation of make_filter, which
57
+ # should only be needed for special cases or internal use by
58
+ # this library.
59
+ #
60
+ # This object is useful for creating simple filters for services
61
+ # that use one URI and are specified by one Type (we expect most
62
+ # Types will fit this paradigm).
63
+ #
64
+ # Creates a BasicServiceEndpoint object and apply the filter
65
+ # functions to it until one of them returns a value.
66
+ class TransformFilterMaker
67
+ attr_reader :filter_procs
68
+
69
+ # Initialize the filter maker's state
70
+ #
71
+ # filter_functions are the endpoint transformer
72
+ # Procs to apply to the basic endpoint. These are called in
73
+ # turn until one of them does not return nil, and the result
74
+ # of that transformer is returned.
75
+ def initialize(filter_procs)
76
+ @filter_procs = filter_procs
77
+ end
78
+
79
+ # Returns an array of endpoint objects produced by the
80
+ # filter procs.
81
+ def get_service_endpoints(yadis_url, service_element)
82
+ endpoints = []
83
+
84
+ # Do an expansion of the service element by xrd:Type and
85
+ # xrd:URI
86
+ Yadis::expand_service(service_element).each { |type_uris, uri, _|
87
+ # Create a basic endpoint object to represent this
88
+ # yadis_url, Service, Type, URI combination
89
+ endpoint = BasicServiceEndpoint.new(
90
+ yadis_url, type_uris, uri, service_element)
91
+
92
+ e = apply_filters(endpoint)
93
+ if !e.nil?
94
+ endpoints << e
95
+ end
96
+ }
97
+ return endpoints
98
+ end
99
+
100
+ def apply_filters(endpoint)
101
+ # Apply filter procs to an endpoint until one of them returns
102
+ # non-nil.
103
+ @filter_procs.each { |filter_proc|
104
+ e = filter_proc.call(endpoint)
105
+ if !e.nil?
106
+ # Once one of the filters has returned an endpoint, do not
107
+ # apply any more.
108
+ return e
109
+ end
110
+ }
111
+
112
+ return nil
113
+ end
114
+ end
115
+
116
+ class CompoundFilter
117
+ attr_reader :subfilters
118
+
119
+ # Create a new filter that applies a set of filters to an
120
+ # endpoint and collects their results.
121
+ def initialize(subfilters)
122
+ @subfilters = subfilters
123
+ end
124
+
125
+ # Generate all endpoint objects for all of the subfilters of
126
+ # this filter and return their concatenation.
127
+ def get_service_endpoints(yadis_url, service_element)
128
+ endpoints = []
129
+ @subfilters.each { |subfilter|
130
+ endpoints += subfilter.get_service_endpoints(yadis_url, service_element)
131
+ }
132
+ return endpoints
133
+ end
134
+ end
135
+
136
+ # Exception raised when something is not able to be turned into a
137
+ # filter
138
+ @@filter_type_error = TypeError.new(
139
+ 'Expected a filter, an endpoint, a callable or a list of any of these.')
140
+
141
+ # Convert a filter-convertable thing into a filter
142
+ #
143
+ # parts should be a filter, an endpoint, a callable, or a list of
144
+ # any of these.
145
+ def self.make_filter(parts)
146
+ # Convert the parts into a list, and pass to mk_compound_filter
147
+ if parts.nil?
148
+ parts = [BasicServiceEndpoint]
149
+ end
150
+
151
+ if parts.is_a?(Array)
152
+ return mk_compound_filter(parts)
153
+ else
154
+ return mk_compound_filter([parts])
155
+ end
156
+ end
157
+
158
+ # Create a filter out of a list of filter-like things
159
+ #
160
+ # Used by make_filter
161
+ #
162
+ # parts should be a list of things that can be passed to make_filter
163
+ def self.mk_compound_filter(parts)
164
+
165
+ if !parts.respond_to?('each')
166
+ raise TypeError, "#{parts.inspect} is not iterable"
167
+ end
168
+
169
+ # Separate into a list of callables and a list of filter objects
170
+ transformers = []
171
+ filters = []
172
+ parts.each { |subfilter|
173
+ if !subfilter.is_a?(Array)
174
+ # If it's not an iterable
175
+ if subfilter.respond_to?('get_service_endpoints')
176
+ # It's a full filter
177
+ filters << subfilter
178
+ elsif subfilter.respond_to?('from_basic_service_endpoint')
179
+ # It's an endpoint object, so put its endpoint conversion
180
+ # attribute into the list of endpoint transformers
181
+ transformers << subfilter.method('from_basic_service_endpoint')
182
+ elsif subfilter.respond_to?('call')
183
+ # It's a proc, so add it to the list of endpoint
184
+ # transformers
185
+ transformers << subfilter
186
+ else
187
+ raise @@filter_type_error
188
+ end
189
+ else
190
+ filters << mk_compound_filter(subfilter)
191
+ end
192
+ }
193
+
194
+ if transformers.length > 0
195
+ filters << TransformFilterMaker.new(transformers)
196
+ end
197
+
198
+ if filters.length == 1
199
+ return filters[0]
200
+ else
201
+ return CompoundFilter.new(filters)
202
+ end
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,305 @@
1
+ # = HTMLTokenizer
2
+ #
3
+ # Author:: Ben Giddings (mailto:bg-rubyforge@infofiend.com)
4
+ # Copyright:: Copyright (c) 2004 Ben Giddings
5
+ # License:: Distributes under the same terms as Ruby
6
+ #
7
+ #
8
+ # This is a partial port of the functionality behind Perl's TokeParser
9
+ # Provided a page it progressively returns tokens from that page
10
+ #
11
+ # $Id: htmltokenizer.rb,v 1.7 2005/06/07 21:05:53 merc Exp $
12
+
13
+ #
14
+ # A class to tokenize HTML.
15
+ #
16
+ # Example:
17
+ #
18
+ # page = "<HTML>
19
+ # <HEAD>
20
+ # <TITLE>This is the title</TITLE>
21
+ # </HEAD>
22
+ # <!-- Here comes the <a href=\"missing.link\">blah</a>
23
+ # comment body
24
+ # -->
25
+ # <BODY>
26
+ # <H1>This is the header</H1>
27
+ # <P>
28
+ # This is the paragraph, it contains
29
+ # <a href=\"link.html\">links</a>,
30
+ # <img src=\"blah.gif\" optional alt='images
31
+ # are
32
+ # really cool'>. Ok, here is some more text and
33
+ # <A href=\"http://another.link.com/\" target=\"_blank\">another link</A>.
34
+ # </P>
35
+ # </body>
36
+ # </HTML>
37
+ # "
38
+ # toke = HTMLTokenizer.new(page)
39
+ #
40
+ # assert("<h1>" == toke.getTag("h1", "h2", "h3").to_s.downcase)
41
+ # assert(HTMLTag.new("<a href=\"link.html\">") == toke.getTag("IMG", "A"))
42
+ # assert("links" == toke.getTrimmedText)
43
+ # assert(toke.getTag("IMG", "A").attr_hash['optional'])
44
+ # assert("_blank" == toke.getTag("IMG", "A").attr_hash['target'])
45
+ #
46
+ class HTMLTokenizer
47
+ @@version = 1.0
48
+
49
+ # Get version of HTMLTokenizer lib
50
+ def self.version
51
+ @@version
52
+ end
53
+
54
+ attr_reader :page
55
+
56
+ # Create a new tokenizer, based on the content, used as a string.
57
+ def initialize(content)
58
+ @page = content.to_s
59
+ @cur_pos = 0
60
+ end
61
+
62
+ # Reset the parser, setting the current position back at the stop
63
+ def reset
64
+ @cur_pos = 0
65
+ end
66
+
67
+ # Look at the next token, but don't actually grab it
68
+ def peekNextToken
69
+ if @cur_pos == @page.length then return nil end
70
+
71
+ if ?< == @page[@cur_pos]
72
+ # Next token is a tag of some kind
73
+ if '!--' == @page[(@cur_pos + 1), 3]
74
+ # Token is a comment
75
+ tag_end = @page.index('-->', (@cur_pos + 1))
76
+ if tag_end.nil?
77
+ raise HTMLTokenizerError, "No end found to started comment:\n#{@page[@cur_pos,80]}"
78
+ end
79
+ # p @page[@cur_pos .. (tag_end+2)]
80
+ HTMLComment.new(@page[@cur_pos .. (tag_end + 2)])
81
+ else
82
+ # Token is a html tag
83
+ tag_end = @page.index('>', (@cur_pos + 1))
84
+ if tag_end.nil?
85
+ raise HTMLTokenizerError, "No end found to started tag:\n#{@page[@cur_pos,80]}"
86
+ end
87
+ # p @page[@cur_pos .. tag_end]
88
+ HTMLTag.new(@page[@cur_pos .. tag_end])
89
+ end
90
+ else
91
+ # Next token is text
92
+ text_end = @page.index('<', @cur_pos)
93
+ text_end = text_end.nil? ? -1 : (text_end - 1)
94
+ # p @page[@cur_pos .. text_end]
95
+ HTMLText.new(@page[@cur_pos .. text_end])
96
+ end
97
+ end
98
+
99
+ # Get the next token, returns an instance of
100
+ # * HTMLText
101
+ # * HTMLToken
102
+ # * HTMLTag
103
+ def getNextToken
104
+ token = peekNextToken
105
+ if token
106
+ # @page = @page[token.raw.length .. -1]
107
+ # @page.slice!(0, token.raw.length)
108
+ @cur_pos += token.raw.length
109
+ end
110
+ #p token
111
+ #print token.raw
112
+ return token
113
+ end
114
+
115
+ # Get a tag from the specified set of desired tags.
116
+ # For example:
117
+ # <tt>foo = toke.getTag("h1", "h2", "h3")</tt>
118
+ # Will return the next header tag encountered.
119
+ def getTag(*sought_tags)
120
+ sought_tags.collect! {|elm| elm.downcase}
121
+
122
+ while (tag = getNextToken)
123
+ if tag.kind_of?(HTMLTag) and
124
+ (0 == sought_tags.length or sought_tags.include?(tag.tag_name))
125
+ break
126
+ end
127
+ end
128
+ tag
129
+ end
130
+
131
+ # Get all the text between the current position and the next tag
132
+ # (if specified) or a specific later tag
133
+ def getText(until_tag = nil)
134
+ if until_tag.nil?
135
+ if ?< == @page[@cur_pos]
136
+ # Next token is a tag, not text
137
+ ""
138
+ else
139
+ # Next token is text
140
+ getNextToken.text
141
+ end
142
+ else
143
+ ret_str = ""
144
+
145
+ while (tag = peekNextToken)
146
+ if tag.kind_of?(HTMLTag) and tag.tag_name == until_tag
147
+ break
148
+ end
149
+
150
+ if ("" != tag.text)
151
+ ret_str << (tag.text + " ")
152
+ end
153
+ getNextToken
154
+ end
155
+
156
+ ret_str
157
+ end
158
+ end
159
+
160
+ # Like getText, but squeeze all whitespace, getting rid of
161
+ # leading and trailing whitespace, and squeezing multiple
162
+ # spaces into a single space.
163
+ def getTrimmedText(until_tag = nil)
164
+ getText(until_tag).strip.gsub(/\s+/m, " ")
165
+ end
166
+
167
+ end
168
+
169
+ class HTMLTokenizerError < Exception
170
+ end
171
+
172
+ # The parent class for all three types of HTML tokens
173
+ class HTMLToken
174
+ attr_accessor :raw
175
+
176
+ # Initialize the token based on the raw text
177
+ def initialize(text)
178
+ @raw = text
179
+ end
180
+
181
+ # By default, return exactly the string used to create the text
182
+ def to_s
183
+ raw
184
+ end
185
+
186
+ # By default tokens have no text representation
187
+ def text
188
+ ""
189
+ end
190
+
191
+ def trimmed_text
192
+ text.strip.gsub(/\s+/m, " ")
193
+ end
194
+
195
+ # Compare to another based on the raw source
196
+ def ==(other)
197
+ raw == other.to_s
198
+ end
199
+ end
200
+
201
+ # Class representing text that isn't inside a tag
202
+ class HTMLText < HTMLToken
203
+ def text
204
+ raw
205
+ end
206
+ end
207
+
208
+ # Class representing an HTML comment
209
+ class HTMLComment < HTMLToken
210
+ attr_accessor :contents
211
+ def initialize(text)
212
+ super(text)
213
+ temp_arr = text.scan(/^<!--\s*(.*?)\s*-->$/m)
214
+ if temp_arr[0].nil?
215
+ raise HTMLTokenizerError, "Text passed to HTMLComment.initialize is not a comment"
216
+ end
217
+
218
+ @contents = temp_arr[0][0]
219
+ end
220
+ end
221
+
222
+ # Class representing an HTML tag
223
+ class HTMLTag < HTMLToken
224
+ attr_reader :end_tag, :tag_name
225
+ def initialize(text)
226
+ super(text)
227
+ if ?< != text[0] or ?> != text[-1]
228
+ raise HTMLTokenizerError, "Text passed to HTMLComment.initialize is not a comment"
229
+ end
230
+
231
+ @attr_hash = Hash.new
232
+ @raw = text
233
+
234
+ tag_name = text.scan(/[\w:-]+/)[0]
235
+ if tag_name.nil?
236
+ raise HTMLTokenizerError, "Error, tag is nil: #{tag_name}"
237
+ end
238
+
239
+ if ?/ == text[1]
240
+ # It's an end tag
241
+ @end_tag = true
242
+ @tag_name = '/' + tag_name.downcase
243
+ else
244
+ @end_tag = false
245
+ @tag_name = tag_name.downcase
246
+ end
247
+
248
+ @hashed = false
249
+ end
250
+
251
+ # Retrieve a hash of all the tag's attributes.
252
+ # Lazily done, so that if you don't look at a tag's attributes
253
+ # things go quicker
254
+ def attr_hash
255
+ # Lazy initialize == don't build the hash until it's needed
256
+ if !@hashed
257
+ if !@end_tag
258
+ # Get the attributes
259
+ attr_arr = @raw.scan(/<[\w:-]+\s+(.*?)\/?>/m)[0]
260
+ if attr_arr.kind_of?(Array)
261
+ # Attributes found, parse them
262
+ attrs = attr_arr[0]
263
+ attr_arr = attrs.scan(/\s*([\w:-]+)(?:\s*=\s*("[^"]*"|'[^']*'|([^"'>][^\s>]*)))?/m)
264
+ # clean up the array by:
265
+ # * setting all nil elements to true
266
+ # * removing enclosing quotes
267
+ attr_arr.each {
268
+ |item|
269
+ val = if item[1].nil?
270
+ item[0]
271
+ elsif '"'[0] == item[1][0] or '\''[0] == item[1][0]
272
+ item[1][1 .. -2]
273
+ else
274
+ item[1]
275
+ end
276
+ @attr_hash[item[0].downcase] = val
277
+ }
278
+ end
279
+ end
280
+ @hashed = true
281
+ end
282
+
283
+ #p self
284
+
285
+ @attr_hash
286
+ end
287
+
288
+ # Get the 'alt' text for a tag, if it exists, or an empty string otherwise
289
+ def text
290
+ if !end_tag
291
+ case tag_name
292
+ when 'img'
293
+ if !attr_hash['alt'].nil?
294
+ return attr_hash['alt']
295
+ end
296
+ when 'applet'
297
+ if !attr_hash['alt'].nil?
298
+ return attr_hash['alt']
299
+ end
300
+ end
301
+ end
302
+ return ''
303
+ end
304
+ end
305
+