ruby-mediawiki 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,259 @@
1
+ =begin
2
+ This file is part of Ruby-MediaWiki.
3
+
4
+ Ruby-MediaWiki is free software: you can redistribute it and/or
5
+ modify it under the terms of the GNU General Public License as
6
+ published by the Free Software Foundation, either version 3 of the
7
+ License, or (at your option) any later version.
8
+
9
+ Ruby-MediaWiki is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with Ruby-MediaWiki. If not, see
16
+ <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ begin
20
+ require 'htree'
21
+ rescue LoadError
22
+ MediaWiki::logger.warn("htree library missing. Cannot sanitize HTML.")
23
+ require 'rexml/document'
24
+ end
25
+
26
+
27
+ module MediaWiki
28
+ ##
29
+ # The Article class represents MediaWiki articles.
30
+ class Article
31
+ ##
32
+ # Article name, will be refreshed upon Article#reload
33
+ attr_accessor :name
34
+ ##
35
+ # Article text, will be set by Article#reload
36
+ attr_accessor :text
37
+ ##
38
+ # this Article is read_only
39
+ attr_accessor :read_only
40
+
41
+ ##
42
+ # Create a new Article instance
43
+ # wiki:: [Wiki] instance to be used to theive the MiniBrowser
44
+ # name:: [String] Article name
45
+ # section:: [Fixnum] Optional article section number
46
+ # load_text:: [Boolean] Invoke Article#reload to retrieve Article#text
47
+ def initialize(wiki, name, section = nil, load_text=true)
48
+ @wiki = wiki
49
+ @name = name
50
+ @section = section
51
+
52
+ @text = nil
53
+ @xhtml = nil
54
+ @xhtml_cached = false
55
+ @wp_edittoken = nil
56
+ @wp_edittime = nil
57
+
58
+ reload if load_text
59
+ end
60
+
61
+ ##
62
+ # Return the full article name
63
+ #
64
+ # This will only return @name, but may be overriden by descendants
65
+ # to include namespaces.
66
+ # result:: [String] Full name
67
+ def full_name
68
+ @name
69
+ end
70
+
71
+ ##
72
+ # Return the URL of the article as configured
73
+ #
74
+ # This will return a nice human-readable URL if your MediaWiki
75
+ # is configured that way, unlike the generic URL returned by
76
+ # Wiki#full_article_url.
77
+ # result:: [String] URL
78
+ def url
79
+ uri = @wiki.url.dup
80
+ uri.path, uri.query = xhtml.elements['//li[@id="ca-nstab-main"]//a'].attributes['href'].split(/\?/, 2)
81
+ uri.to_s
82
+ end
83
+
84
+ ##
85
+ # Return the URL of the talk page of the article
86
+ #
87
+ # This will return a nice human-readable URL to the talk page
88
+ # of an article if your MediWiki is configured that way.
89
+ # For empty talk pages this will return an ugly URL just
90
+ # as MediaWiki does.
91
+ # result:: [String] URL
92
+ def talk_url
93
+ uri = @wiki.url.dup
94
+ uri.path, uri.query = xhtml.elements['//li[@id="ca-talk"]//a'].attributes['href'].split(/\?/, 2)
95
+ uri.to_s
96
+ end
97
+
98
+ ##
99
+ # Get the XHTML,
100
+ # will invoke Article#xhtml_reload if not already cached
101
+ # result:: [REXML::Element] html root element
102
+ def xhtml
103
+ unless @xhtml_cached
104
+ xhtml_reload
105
+ end
106
+ @xhtml
107
+ end
108
+
109
+ ##
110
+ # Reload the xhtml,
111
+ # will be automatically done by Article#xhtml if not already cached.
112
+ def xhtml_reload
113
+ html = @wiki.browser.get_content("#{@wiki.article_url(full_name, @section)}")
114
+ @xhtml = to_rexml( html )
115
+
116
+ @xhtml_cached = true
117
+ end
118
+
119
+ ##
120
+ # Reload Article#text,
121
+ # should be done by Article#initialize.
122
+ def reload
123
+ MediaWiki::logger.debug("Loading #{@wiki.article_url(full_name, @section)}&action=edit")
124
+ parse @wiki.browser.get_content("#{@wiki.article_url(full_name, @section)}&action=edit")
125
+ end
126
+
127
+ class NoEditFormFound < RuntimeError
128
+ end
129
+
130
+ def parse(html)
131
+ doc = to_rexml( html )
132
+ # does not work for MediaWiki 1.4.x and is always the same name you ask for under 1.5.x
133
+ # @name = doc.elements['//span[@class="editHelp"]/a'].attributes['title']
134
+ if form = doc.elements['//form[@name="editform"]']
135
+ # we got an editable article
136
+ @text = form.elements['textarea[@name="wpTextbox1"]'].text
137
+ begin
138
+ form.each_element('input') { |e|
139
+ @wp_edittoken = e.attributes['value'] if e.attributes['name'] == 'wpEditToken'
140
+ @wp_edittime = e.attributes['value'] if e.attributes['name'] == 'wpEdittime'
141
+ }
142
+ @read_only = false
143
+ rescue NoMethodError
144
+ # wpEditToken might be missing, that's ok
145
+ end
146
+ else
147
+ if doc.elements['//textarea']
148
+ # the article is probably locked and you do not have sufficient privileges
149
+ @text = doc.elements['//textarea'].text
150
+ @read_only = true
151
+ else
152
+ raise NoEditFormFound, "Error while parsing result, no edit form found"
153
+ end
154
+ end
155
+ end
156
+
157
+ ##
158
+ # Push the *Submit* button
159
+ #
160
+ # Send the modified Article#text to the MediaWiki.
161
+ # summary:: [String] Change summary
162
+ # minor_edit:: [Boolean] This is a Minor Edit
163
+ # watch_this:: [Boolean] Watch this article
164
+ def submit(summary, minor_edit=false, watch_this=false, retries=10)
165
+ raise "This Article is read-only." if read_only
166
+ MediaWiki::logger.debug("Posting to #{@wiki.article_url(full_name, @section)}&action=submit with wpEditToken=#{@wp_edittoken} wpEdittime=#{@wp_edittime}")
167
+ data = {'wpTextbox1' => @text, 'wpSummary' => summary, 'wpSave' => 1, 'wpEditToken' => @wp_edittoken, 'wpEdittime' => @wp_edittime}
168
+ data['wpMinoredit'] = 1 if minor_edit
169
+ data['wpWatchthis'] = 'on' if watch_this
170
+ begin
171
+ parse @wiki.browser.post_content("#{@wiki.article_url(full_name, @section)}&action=submit", data)
172
+ rescue NoEditFormFound
173
+ # This means, we havn't got the preview page, but the posted article
174
+ # So everything is Ok, but we must reload the edit page here, to get
175
+ # a new wpEditToken and wpEdittime
176
+ reload
177
+ return
178
+ rescue Net::HTTPInternalServerError
179
+ end
180
+
181
+ unless @wp_edittoken.to_s == '' and @wp_edittime.to_s == ''
182
+ if (data['wpEditToken'] != @wp_edittoken) or (data['wpEdittime'] != @wp_edittime)
183
+ if retries > 0
184
+ submit(summary, minor_edit, watch_this, retries - 1)
185
+ else
186
+ raise "Re-submit limit reached"
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ ##
193
+ # Delete this article
194
+ # reason:: [String] Delete reason
195
+ def delete(reason)
196
+ data = {'wpReason' => reason, 'wpEditToken' => @wp_edittoken, 'wpConfirmB' => 'Delete Page'}
197
+ result = @wiki.browser.post_content("#{@wiki.article_url(full_name)}&action=delete", data)
198
+ end
199
+
200
+ ##
201
+ # Protect this article
202
+ # reason:: [String] Protect reason
203
+ def protect(reason, moves_only=false)
204
+ data = {'wpReasonProtect' => reason, 'wpEditToken' => @wp_edittoken, 'wpConfirmProtectB' => 'Protect Page'}
205
+ data['wpMoveOnly'] = 1 if moves_only
206
+ result = @wiki.browser.post_content("#{@wiki.article_url(full_name)}&action=protect", data)
207
+ end
208
+
209
+ ##
210
+ # Unprotect this article
211
+ # reason:: [String] Unprotect reason
212
+ def unprotect(reason)
213
+ data = {'wpReasonProtect' => reason, 'wpEditToken' => @wp_edittoken, 'wpConfirmProtectB' => 'Protect Page'}
214
+ result = @wiki.browser.post_content("#{@wiki.article_url(full_name)}&action=unprotect", data)
215
+ end
216
+
217
+ ##
218
+ # "what links here" url for this article
219
+ def what_links_here_url(count = nil)
220
+ url = @wiki.article_url("Special:Whatlinkshere/#{full_name}")
221
+ url << "&limit=#{count}" if count
222
+ end
223
+
224
+
225
+ ##
226
+ # What articles link to this article?
227
+ # result:: [Array] of [String] Article names
228
+ def what_links_here(count = nil)
229
+ res = []
230
+ url = what_links_here_url(count)
231
+ links = to_rexml(@wiki.browser.get_content(url))
232
+ links.each_element('//div[@id="bodyContent"]//ul/li/a') { |a|
233
+ res << a.attributes['title']
234
+ }
235
+ res
236
+ end
237
+
238
+ def fast_what_links_here(count = nil)
239
+ res = []
240
+ url = what_links_here_url(count)
241
+ content = @wiki.browser.get_content(url)
242
+ content.scan(%r{<li><a href=".+?" title="(.+?)">.+?</a>.+?</li>}).flatten.map { |title|
243
+ REXML::Text.unnormalize(title)
244
+ }
245
+ end
246
+
247
+ protected
248
+ def to_rexml( html )
249
+ if Class.constants.member?( 'HTree' )
250
+ rexml = HTree( html ).to_rexml
251
+ else
252
+ rexml = REXML::Document.new( html )
253
+ end
254
+ rexml.root
255
+ end
256
+
257
+ end
258
+
259
+ end
@@ -0,0 +1,54 @@
1
+ =begin
2
+ This file is part of Ruby-MediaWiki.
3
+
4
+ Ruby-MediaWiki is free software: you can redistribute it and/or
5
+ modify it under the terms of the GNU General Public License as
6
+ published by the Free Software Foundation, either version 3 of the
7
+ License, or (at your option) any later version.
8
+
9
+ Ruby-MediaWiki is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with Ruby-MediaWiki. If not, see
16
+ <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ require 'rexml/document'
20
+ require 'mediawiki/article'
21
+
22
+ module MediaWiki
23
+ ##
24
+ # The Category class represents MediaWiki categories.
25
+ class Category < Article
26
+ ##
27
+ # This returns the full article name prefixed with "Category:"
28
+ # instead of the name, which should not carry a prefix.
29
+ def full_name
30
+ "Category:#{@name}"
31
+ end
32
+
33
+ ##
34
+ # Calls the reload function of the super-class (Article#reload)
35
+ # but removes the prefix (namespace) then.
36
+ #
37
+ # Use to full_name to obtain the name with namespace.
38
+ def reload
39
+ super
40
+ @name.sub!(/^.+?:/, '')
41
+ end
42
+
43
+ ##
44
+ # Which articles belong to this category?
45
+ # result:: [Array] of [String] Article names
46
+ def articles
47
+ res = []
48
+ xhtml.each_element('//div[@id="bodyContent"]//ul/li/a') { |a,|
49
+ res << a.attributes['title']
50
+ }
51
+ res
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,51 @@
1
+ =begin
2
+ This file is part of Ruby-MediaWiki.
3
+
4
+ Ruby-MediaWiki is free software: you can redistribute it and/or
5
+ modify it under the terms of the GNU General Public License as
6
+ published by the Free Software Foundation, either version 3 of the
7
+ License, or (at your option) any later version.
8
+
9
+ Ruby-MediaWiki is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with Ruby-MediaWiki. If not, see
16
+ <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ require 'yaml'
20
+ require 'mediawiki'
21
+
22
+ module MediaWiki
23
+ ##
24
+ # dotfile function reads the user's MediaWiki config and
25
+ # creates a Wiki instance.
26
+ #
27
+ # The filename is determined by the environment variable
28
+ # MEDIAWIKI_RC or defaults to ~/.mediawikirc .
29
+ #
30
+ # A configured wiki can be chosen with the MEDIAWIKI_WIKI
31
+ # environment variable, by the option mywiki or defaults
32
+ # to the wiki pointed by default.
33
+ #
34
+ # A robot may set [myrealm] to retrieve a second result
35
+ # output: a section with this name in the current wiki's
36
+ # configuration file for configuration of specific robot
37
+ # tasks.
38
+ def MediaWiki.dotfile(myrealm=nil,mywiki=nil)
39
+ filename = ENV['MEDIAWIKI_RC'] || "#{ENV['HOME']}/.mediawikirc"
40
+ dotfile = YAML::load(File.new(filename))
41
+
42
+ wikiconf = dotfile[mywiki] || dotfile[ENV['MEDIAWIKI_WIKI'] || dotfile['default']]
43
+ wiki = Wiki.new(wikiconf['url'], wikiconf['user'], wikiconf['password'])
44
+
45
+ if myrealm
46
+ [wiki, wikiconf[myrealm]]
47
+ else
48
+ wiki
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,140 @@
1
+ =begin
2
+ This file is part of Ruby-MediaWiki.
3
+
4
+ Ruby-MediaWiki is free software: you can redistribute it and/or
5
+ modify it under the terms of the GNU General Public License as
6
+ published by the Free Software Foundation, either version 3 of the
7
+ License, or (at your option) any later version.
8
+
9
+ Ruby-MediaWiki is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with Ruby-MediaWiki. If not, see
16
+ <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ require 'net/http'
20
+ require 'net/https'
21
+ require 'cgi'
22
+
23
+ module MediaWiki
24
+ ##
25
+ # The MiniBrowser is used to perform GET and POST requests
26
+ # over HTTP and HTTPS, supporting:
27
+ # * HTTP-Auth encoding in URLs (proto://user:password@host/...)
28
+ # * Cookie support
29
+ # * HTTP Redirection (max. 10 in a row)
30
+ #
31
+ # All interaction with MiniBrowser is normally done by
32
+ # MediaWiki::Wiki.
33
+ class MiniBrowser
34
+ ##
35
+ # Initialize a MiniBrowser instance
36
+ # url:: [URI::HTTP] or [URI::HTTPS]
37
+ def initialize(url)
38
+ @url = url
39
+ @http = Net::HTTP.new( @url.host, @url.port )
40
+ @http.use_ssl = true if @url.class == URI::HTTPS
41
+ @user_agent = 'WikiBot'
42
+ @cookies = {}
43
+ end
44
+
45
+ ##
46
+ # Add cookies to the volatile cookie cache
47
+ # cookies:: [Array]
48
+ def add_cookie(cookies)
49
+ cookies.each do | c |
50
+ c.gsub!(/;.*$/, '')
51
+ if match = c.match(/([^=]+)=(.*)/)
52
+ @cookies[match[1]] = match[2]
53
+ end
54
+ end
55
+ end
56
+
57
+ ##
58
+ # Get the cookie cache in a serialized form ready for HTTP.
59
+ # result:: [String]
60
+ def cookies
61
+ c = @cookies.collect do | key, value | "#{key}=#{value}" end
62
+ c.join(";")
63
+ end
64
+
65
+ ##
66
+ # Perform a GET request
67
+ #
68
+ # This method accepts 10 HTTP redirects at max.
69
+ # url:: [String]
70
+ # result:: [String] Document
71
+ def get_content(url)
72
+ retries = 10
73
+
74
+ @http.start { |http|
75
+ loop {
76
+ raise "too many redirects" if retries < 1
77
+
78
+ request = Net::HTTP::Get.new(url, {'Content-Type' => 'application/x-www-form-urlencoded',
79
+ 'User-Agent' => @user_agent,
80
+ 'Cookie' => cookies})
81
+ request.basic_auth(@url.user, @url.password) if @url.user
82
+ response = http.request(request)
83
+
84
+ case response
85
+ when Net::HTTPSuccess, Net::HTTPNotFound then
86
+ return response.body
87
+ when Net::HTTPRedirection then
88
+ MediaWiki::logger.debug("Redirecting to #{response['Location']}")
89
+ retries -= 1
90
+ url = response['Location']
91
+ else
92
+ raise "Unknown Response: #{response.inspect}"
93
+ end
94
+ }
95
+ }
96
+ end
97
+
98
+ ##
99
+ # Perform a POST request
100
+ #
101
+ # Will switch to MiniBrowser#get_content upon HTTP redirect.
102
+ # url:: [String]
103
+ # data:: [Hash] POST data
104
+ # result:: [String] Document
105
+ def post_content(url, data)
106
+ post_data = data.collect { | key, value | "#{CGI::escape(key.to_s)}=#{CGI::escape(value.to_s)}" }.join('&')
107
+ response = nil
108
+
109
+ @http.start { |http|
110
+ request = Net::HTTP::Post.new(url, {'Content-Type' => 'application/x-www-form-urlencoded',
111
+ 'User-Agent' => @user_agent,
112
+ 'Cookie' => cookies})
113
+ request.basic_auth(@url.user, @url.password) if @url.user
114
+ response = http.request(request, post_data)
115
+ }
116
+
117
+ case response
118
+ when Net::HTTPSuccess
119
+ then
120
+ begin
121
+ add_cookie( response.get_fields('Set-Cookie') ) if response['Set-Cookie']
122
+ rescue NoMethodError
123
+ add_cookie( response['Set-Cookie'] ) if response['Set-Cookie']
124
+ end
125
+ return response.body
126
+ when Net::HTTPRedirection
127
+ then
128
+ MediaWiki::logger.debug("Redirecting to #{response['Location']}")
129
+ begin
130
+ add_cookie( response.get_fields('Set-Cookie') ) if response['Set-Cookie']
131
+ rescue NoMethodError
132
+ add_cookie( response['Set-Cookie'] ) if response['Set-Cookie']
133
+ end
134
+ return get_content(response['Location'])
135
+ else
136
+ raise "Unknown Response on #{url}: #{response.inspect}"
137
+ end
138
+ end
139
+ end
140
+ end