ruby-mediawiki 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,259 @@
1
+ =begin
2
+ This file is part of Ruby-MediaWiki.
3
+
4
+ Ruby-MediaWiki is free software: you can redistribute it and/or
5
+ modify it under the terms of the GNU General Public License as
6
+ published by the Free Software Foundation, either version 3 of the
7
+ License, or (at your option) any later version.
8
+
9
+ Ruby-MediaWiki is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with Ruby-MediaWiki. If not, see
16
+ <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ begin
20
+ require 'htree'
21
+ rescue LoadError
22
+ MediaWiki::logger.warn("htree library missing. Cannot sanitize HTML.")
23
+ require 'rexml/document'
24
+ end
25
+
26
+
27
+ module MediaWiki
28
+ ##
29
+ # The Article class represents MediaWiki articles.
30
+ class Article
31
+ ##
32
+ # Article name, will be refreshed upon Article#reload
33
+ attr_accessor :name
34
+ ##
35
+ # Article text, will be set by Article#reload
36
+ attr_accessor :text
37
+ ##
38
+ # this Article is read_only
39
+ attr_accessor :read_only
40
+
41
+ ##
42
+ # Create a new Article instance
43
+ # wiki:: [Wiki] instance to be used to theive the MiniBrowser
44
+ # name:: [String] Article name
45
+ # section:: [Fixnum] Optional article section number
46
+ # load_text:: [Boolean] Invoke Article#reload to retrieve Article#text
47
+ def initialize(wiki, name, section = nil, load_text=true)
48
+ @wiki = wiki
49
+ @name = name
50
+ @section = section
51
+
52
+ @text = nil
53
+ @xhtml = nil
54
+ @xhtml_cached = false
55
+ @wp_edittoken = nil
56
+ @wp_edittime = nil
57
+
58
+ reload if load_text
59
+ end
60
+
61
+ ##
62
+ # Return the full article name
63
+ #
64
+ # This will only return @name, but may be overriden by descendants
65
+ # to include namespaces.
66
+ # result:: [String] Full name
67
+ def full_name
68
+ @name
69
+ end
70
+
71
+ ##
72
+ # Return the URL of the article as configured
73
+ #
74
+ # This will return a nice human-readable URL if your MediaWiki
75
+ # is configured that way, unlike the generic URL returned by
76
+ # Wiki#full_article_url.
77
+ # result:: [String] URL
78
+ def url
79
+ uri = @wiki.url.dup
80
+ uri.path, uri.query = xhtml.elements['//li[@id="ca-nstab-main"]//a'].attributes['href'].split(/\?/, 2)
81
+ uri.to_s
82
+ end
83
+
84
+ ##
85
+ # Return the URL of the talk page of the article
86
+ #
87
+ # This will return a nice human-readable URL to the talk page
88
+ # of an article if your MediWiki is configured that way.
89
+ # For empty talk pages this will return an ugly URL just
90
+ # as MediaWiki does.
91
+ # result:: [String] URL
92
+ def talk_url
93
+ uri = @wiki.url.dup
94
+ uri.path, uri.query = xhtml.elements['//li[@id="ca-talk"]//a'].attributes['href'].split(/\?/, 2)
95
+ uri.to_s
96
+ end
97
+
98
+ ##
99
+ # Get the XHTML,
100
+ # will invoke Article#xhtml_reload if not already cached
101
+ # result:: [REXML::Element] html root element
102
+ def xhtml
103
+ unless @xhtml_cached
104
+ xhtml_reload
105
+ end
106
+ @xhtml
107
+ end
108
+
109
+ ##
110
+ # Reload the xhtml,
111
+ # will be automatically done by Article#xhtml if not already cached.
112
+ def xhtml_reload
113
+ html = @wiki.browser.get_content("#{@wiki.article_url(full_name, @section)}")
114
+ @xhtml = to_rexml( html )
115
+
116
+ @xhtml_cached = true
117
+ end
118
+
119
+ ##
120
+ # Reload Article#text,
121
+ # should be done by Article#initialize.
122
+ def reload
123
+ MediaWiki::logger.debug("Loading #{@wiki.article_url(full_name, @section)}&action=edit")
124
+ parse @wiki.browser.get_content("#{@wiki.article_url(full_name, @section)}&action=edit")
125
+ end
126
+
127
+ class NoEditFormFound < RuntimeError
128
+ end
129
+
130
+ def parse(html)
131
+ doc = to_rexml( html )
132
+ # does not work for MediaWiki 1.4.x and is always the same name you ask for under 1.5.x
133
+ # @name = doc.elements['//span[@class="editHelp"]/a'].attributes['title']
134
+ if form = doc.elements['//form[@name="editform"]']
135
+ # we got an editable article
136
+ @text = form.elements['textarea[@name="wpTextbox1"]'].text
137
+ begin
138
+ form.each_element('input') { |e|
139
+ @wp_edittoken = e.attributes['value'] if e.attributes['name'] == 'wpEditToken'
140
+ @wp_edittime = e.attributes['value'] if e.attributes['name'] == 'wpEdittime'
141
+ }
142
+ @read_only = false
143
+ rescue NoMethodError
144
+ # wpEditToken might be missing, that's ok
145
+ end
146
+ else
147
+ if doc.elements['//textarea']
148
+ # the article is probably locked and you do not have sufficient privileges
149
+ @text = doc.elements['//textarea'].text
150
+ @read_only = true
151
+ else
152
+ raise NoEditFormFound, "Error while parsing result, no edit form found"
153
+ end
154
+ end
155
+ end
156
+
157
+ ##
158
+ # Push the *Submit* button
159
+ #
160
+ # Send the modified Article#text to the MediaWiki.
161
+ # summary:: [String] Change summary
162
+ # minor_edit:: [Boolean] This is a Minor Edit
163
+ # watch_this:: [Boolean] Watch this article
164
+ def submit(summary, minor_edit=false, watch_this=false, retries=10)
165
+ raise "This Article is read-only." if read_only
166
+ MediaWiki::logger.debug("Posting to #{@wiki.article_url(full_name, @section)}&action=submit with wpEditToken=#{@wp_edittoken} wpEdittime=#{@wp_edittime}")
167
+ data = {'wpTextbox1' => @text, 'wpSummary' => summary, 'wpSave' => 1, 'wpEditToken' => @wp_edittoken, 'wpEdittime' => @wp_edittime}
168
+ data['wpMinoredit'] = 1 if minor_edit
169
+ data['wpWatchthis'] = 'on' if watch_this
170
+ begin
171
+ parse @wiki.browser.post_content("#{@wiki.article_url(full_name, @section)}&action=submit", data)
172
+ rescue NoEditFormFound
173
+ # This means, we havn't got the preview page, but the posted article
174
+ # So everything is Ok, but we must reload the edit page here, to get
175
+ # a new wpEditToken and wpEdittime
176
+ reload
177
+ return
178
+ rescue Net::HTTPInternalServerError
179
+ end
180
+
181
+ unless @wp_edittoken.to_s == '' and @wp_edittime.to_s == ''
182
+ if (data['wpEditToken'] != @wp_edittoken) or (data['wpEdittime'] != @wp_edittime)
183
+ if retries > 0
184
+ submit(summary, minor_edit, watch_this, retries - 1)
185
+ else
186
+ raise "Re-submit limit reached"
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ ##
193
+ # Delete this article
194
+ # reason:: [String] Delete reason
195
+ def delete(reason)
196
+ data = {'wpReason' => reason, 'wpEditToken' => @wp_edittoken, 'wpConfirmB' => 'Delete Page'}
197
+ result = @wiki.browser.post_content("#{@wiki.article_url(full_name)}&action=delete", data)
198
+ end
199
+
200
+ ##
201
+ # Protect this article
202
+ # reason:: [String] Protect reason
203
+ def protect(reason, moves_only=false)
204
+ data = {'wpReasonProtect' => reason, 'wpEditToken' => @wp_edittoken, 'wpConfirmProtectB' => 'Protect Page'}
205
+ data['wpMoveOnly'] = 1 if moves_only
206
+ result = @wiki.browser.post_content("#{@wiki.article_url(full_name)}&action=protect", data)
207
+ end
208
+
209
+ ##
210
+ # Unprotect this article
211
+ # reason:: [String] Unprotect reason
212
+ def unprotect(reason)
213
+ data = {'wpReasonProtect' => reason, 'wpEditToken' => @wp_edittoken, 'wpConfirmProtectB' => 'Protect Page'}
214
+ result = @wiki.browser.post_content("#{@wiki.article_url(full_name)}&action=unprotect", data)
215
+ end
216
+
217
+ ##
218
+ # "what links here" url for this article
219
+ def what_links_here_url(count = nil)
220
+ url = @wiki.article_url("Special:Whatlinkshere/#{full_name}")
221
+ url << "&limit=#{count}" if count
222
+ end
223
+
224
+
225
+ ##
226
+ # What articles link to this article?
227
+ # result:: [Array] of [String] Article names
228
+ def what_links_here(count = nil)
229
+ res = []
230
+ url = what_links_here_url(count)
231
+ links = to_rexml(@wiki.browser.get_content(url))
232
+ links.each_element('//div[@id="bodyContent"]//ul/li/a') { |a|
233
+ res << a.attributes['title']
234
+ }
235
+ res
236
+ end
237
+
238
+ def fast_what_links_here(count = nil)
239
+ res = []
240
+ url = what_links_here_url(count)
241
+ content = @wiki.browser.get_content(url)
242
+ content.scan(%r{<li><a href=".+?" title="(.+?)">.+?</a>.+?</li>}).flatten.map { |title|
243
+ REXML::Text.unnormalize(title)
244
+ }
245
+ end
246
+
247
+ protected
248
+ def to_rexml( html )
249
+ if Class.constants.member?( 'HTree' )
250
+ rexml = HTree( html ).to_rexml
251
+ else
252
+ rexml = REXML::Document.new( html )
253
+ end
254
+ rexml.root
255
+ end
256
+
257
+ end
258
+
259
+ end
@@ -0,0 +1,54 @@
1
+ =begin
2
+ This file is part of Ruby-MediaWiki.
3
+
4
+ Ruby-MediaWiki is free software: you can redistribute it and/or
5
+ modify it under the terms of the GNU General Public License as
6
+ published by the Free Software Foundation, either version 3 of the
7
+ License, or (at your option) any later version.
8
+
9
+ Ruby-MediaWiki is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with Ruby-MediaWiki. If not, see
16
+ <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ require 'rexml/document'
20
+ require 'mediawiki/article'
21
+
22
+ module MediaWiki
23
+ ##
24
+ # The Category class represents MediaWiki categories.
25
+ class Category < Article
26
+ ##
27
+ # This returns the full article name prefixed with "Category:"
28
+ # instead of the name, which should not carry a prefix.
29
+ def full_name
30
+ "Category:#{@name}"
31
+ end
32
+
33
+ ##
34
+ # Calls the reload function of the super-class (Article#reload)
35
+ # but removes the prefix (namespace) then.
36
+ #
37
+ # Use to full_name to obtain the name with namespace.
38
+ def reload
39
+ super
40
+ @name.sub!(/^.+?:/, '')
41
+ end
42
+
43
+ ##
44
+ # Which articles belong to this category?
45
+ # result:: [Array] of [String] Article names
46
+ def articles
47
+ res = []
48
+ xhtml.each_element('//div[@id="bodyContent"]//ul/li/a') { |a,|
49
+ res << a.attributes['title']
50
+ }
51
+ res
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,51 @@
1
+ =begin
2
+ This file is part of Ruby-MediaWiki.
3
+
4
+ Ruby-MediaWiki is free software: you can redistribute it and/or
5
+ modify it under the terms of the GNU General Public License as
6
+ published by the Free Software Foundation, either version 3 of the
7
+ License, or (at your option) any later version.
8
+
9
+ Ruby-MediaWiki is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with Ruby-MediaWiki. If not, see
16
+ <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ require 'yaml'
20
+ require 'mediawiki'
21
+
22
+ module MediaWiki
23
+ ##
24
+ # dotfile function reads the user's MediaWiki config and
25
+ # creates a Wiki instance.
26
+ #
27
+ # The filename is determined by the environment variable
28
+ # MEDIAWIKI_RC or defaults to ~/.mediawikirc .
29
+ #
30
+ # A configured wiki can be chosen with the MEDIAWIKI_WIKI
31
+ # environment variable, by the option mywiki or defaults
32
+ # to the wiki pointed by default.
33
+ #
34
+ # A robot may set [myrealm] to retrieve a second result
35
+ # output: a section with this name in the current wiki's
36
+ # configuration file for configuration of specific robot
37
+ # tasks.
38
+ def MediaWiki.dotfile(myrealm=nil,mywiki=nil)
39
+ filename = ENV['MEDIAWIKI_RC'] || "#{ENV['HOME']}/.mediawikirc"
40
+ dotfile = YAML::load(File.new(filename))
41
+
42
+ wikiconf = dotfile[mywiki] || dotfile[ENV['MEDIAWIKI_WIKI'] || dotfile['default']]
43
+ wiki = Wiki.new(wikiconf['url'], wikiconf['user'], wikiconf['password'])
44
+
45
+ if myrealm
46
+ [wiki, wikiconf[myrealm]]
47
+ else
48
+ wiki
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,140 @@
1
+ =begin
2
+ This file is part of Ruby-MediaWiki.
3
+
4
+ Ruby-MediaWiki is free software: you can redistribute it and/or
5
+ modify it under the terms of the GNU General Public License as
6
+ published by the Free Software Foundation, either version 3 of the
7
+ License, or (at your option) any later version.
8
+
9
+ Ruby-MediaWiki is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with Ruby-MediaWiki. If not, see
16
+ <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ require 'net/http'
20
+ require 'net/https'
21
+ require 'cgi'
22
+
23
+ module MediaWiki
24
+ ##
25
+ # The MiniBrowser is used to perform GET and POST requests
26
+ # over HTTP and HTTPS, supporting:
27
+ # * HTTP-Auth encoding in URLs (proto://user:password@host/...)
28
+ # * Cookie support
29
+ # * HTTP Redirection (max. 10 in a row)
30
+ #
31
+ # All interaction with MiniBrowser is normally done by
32
+ # MediaWiki::Wiki.
33
+ class MiniBrowser
34
+ ##
35
+ # Initialize a MiniBrowser instance
36
+ # url:: [URI::HTTP] or [URI::HTTPS]
37
+ def initialize(url)
38
+ @url = url
39
+ @http = Net::HTTP.new( @url.host, @url.port )
40
+ @http.use_ssl = true if @url.class == URI::HTTPS
41
+ @user_agent = 'WikiBot'
42
+ @cookies = {}
43
+ end
44
+
45
+ ##
46
+ # Add cookies to the volatile cookie cache
47
+ # cookies:: [Array]
48
+ def add_cookie(cookies)
49
+ cookies.each do | c |
50
+ c.gsub!(/;.*$/, '')
51
+ if match = c.match(/([^=]+)=(.*)/)
52
+ @cookies[match[1]] = match[2]
53
+ end
54
+ end
55
+ end
56
+
57
+ ##
58
+ # Get the cookie cache in a serialized form ready for HTTP.
59
+ # result:: [String]
60
+ def cookies
61
+ c = @cookies.collect do | key, value | "#{key}=#{value}" end
62
+ c.join(";")
63
+ end
64
+
65
+ ##
66
+ # Perform a GET request
67
+ #
68
+ # This method accepts 10 HTTP redirects at max.
69
+ # url:: [String]
70
+ # result:: [String] Document
71
+ def get_content(url)
72
+ retries = 10
73
+
74
+ @http.start { |http|
75
+ loop {
76
+ raise "too many redirects" if retries < 1
77
+
78
+ request = Net::HTTP::Get.new(url, {'Content-Type' => 'application/x-www-form-urlencoded',
79
+ 'User-Agent' => @user_agent,
80
+ 'Cookie' => cookies})
81
+ request.basic_auth(@url.user, @url.password) if @url.user
82
+ response = http.request(request)
83
+
84
+ case response
85
+ when Net::HTTPSuccess, Net::HTTPNotFound then
86
+ return response.body
87
+ when Net::HTTPRedirection then
88
+ MediaWiki::logger.debug("Redirecting to #{response['Location']}")
89
+ retries -= 1
90
+ url = response['Location']
91
+ else
92
+ raise "Unknown Response: #{response.inspect}"
93
+ end
94
+ }
95
+ }
96
+ end
97
+
98
+ ##
99
+ # Perform a POST request
100
+ #
101
+ # Will switch to MiniBrowser#get_content upon HTTP redirect.
102
+ # url:: [String]
103
+ # data:: [Hash] POST data
104
+ # result:: [String] Document
105
+ def post_content(url, data)
106
+ post_data = data.collect { | key, value | "#{CGI::escape(key.to_s)}=#{CGI::escape(value.to_s)}" }.join('&')
107
+ response = nil
108
+
109
+ @http.start { |http|
110
+ request = Net::HTTP::Post.new(url, {'Content-Type' => 'application/x-www-form-urlencoded',
111
+ 'User-Agent' => @user_agent,
112
+ 'Cookie' => cookies})
113
+ request.basic_auth(@url.user, @url.password) if @url.user
114
+ response = http.request(request, post_data)
115
+ }
116
+
117
+ case response
118
+ when Net::HTTPSuccess
119
+ then
120
+ begin
121
+ add_cookie( response.get_fields('Set-Cookie') ) if response['Set-Cookie']
122
+ rescue NoMethodError
123
+ add_cookie( response['Set-Cookie'] ) if response['Set-Cookie']
124
+ end
125
+ return response.body
126
+ when Net::HTTPRedirection
127
+ then
128
+ MediaWiki::logger.debug("Redirecting to #{response['Location']}")
129
+ begin
130
+ add_cookie( response.get_fields('Set-Cookie') ) if response['Set-Cookie']
131
+ rescue NoMethodError
132
+ add_cookie( response['Set-Cookie'] ) if response['Set-Cookie']
133
+ end
134
+ return get_content(response['Location'])
135
+ else
136
+ raise "Unknown Response on #{url}: #{response.inspect}"
137
+ end
138
+ end
139
+ end
140
+ end