mediawiki-gateway 0.6.2 → 1.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/COPYING +22 -0
  3. data/ChangeLog +16 -0
  4. data/README.md +80 -21
  5. data/Rakefile +28 -34
  6. data/bin/mediawiki-gateway +203 -0
  7. data/lib/media_wiki.rb +4 -9
  8. data/lib/media_wiki/exception.rb +11 -8
  9. data/lib/media_wiki/fake_wiki.rb +636 -0
  10. data/lib/media_wiki/gateway.rb +105 -940
  11. data/lib/media_wiki/gateway/files.rb +173 -0
  12. data/lib/media_wiki/gateway/pages.rb +400 -0
  13. data/lib/media_wiki/gateway/query.rb +98 -0
  14. data/lib/media_wiki/gateway/site.rb +101 -0
  15. data/lib/media_wiki/gateway/users.rb +182 -0
  16. data/lib/media_wiki/utils.rb +47 -13
  17. data/lib/media_wiki/version.rb +27 -0
  18. data/lib/mediawiki-gateway.rb +1 -0
  19. data/spec/{import-test-data.xml → data/import.xml} +0 -0
  20. data/spec/media_wiki/gateway/files_spec.rb +34 -0
  21. data/spec/media_wiki/gateway/pages_spec.rb +390 -0
  22. data/spec/media_wiki/gateway/query_spec.rb +84 -0
  23. data/spec/media_wiki/gateway/site_spec.rb +122 -0
  24. data/spec/media_wiki/gateway/users_spec.rb +171 -0
  25. data/spec/media_wiki/gateway_spec.rb +129 -0
  26. data/spec/{live_gateway_spec.rb → media_wiki/live_gateway_spec.rb} +31 -35
  27. data/spec/{utils_spec.rb → media_wiki/utils_spec.rb} +41 -39
  28. data/spec/spec_helper.rb +17 -16
  29. metadata +77 -135
  30. data/.ruby-version +0 -1
  31. data/.rvmrc +0 -34
  32. data/Gemfile +0 -19
  33. data/Gemfile.lock +0 -77
  34. data/LICENSE +0 -21
  35. data/config/hosts.yml +0 -17
  36. data/lib/media_wiki/config.rb +0 -69
  37. data/mediawiki-gateway.gemspec +0 -113
  38. data/samples/README +0 -18
  39. data/samples/create_page.rb +0 -13
  40. data/samples/delete_batch.rb +0 -14
  41. data/samples/download_batch.rb +0 -15
  42. data/samples/email_user.rb +0 -14
  43. data/samples/export_xml.rb +0 -14
  44. data/samples/get_page.rb +0 -11
  45. data/samples/import_xml.rb +0 -14
  46. data/samples/run_fake_media_wiki.rb +0 -8
  47. data/samples/search_content.rb +0 -12
  48. data/samples/semantic_query.rb +0 -17
  49. data/samples/upload_commons.rb +0 -45
  50. data/samples/upload_file.rb +0 -13
  51. data/spec/fake_media_wiki/api_pages.rb +0 -135
  52. data/spec/fake_media_wiki/app.rb +0 -360
  53. data/spec/fake_media_wiki/query_handling.rb +0 -136
  54. data/spec/gateway_spec.rb +0 -888
@@ -1,18 +1,23 @@
1
- require 'rubygems'
2
1
  require 'logger'
3
2
  require 'rest_client'
4
3
  require 'rexml/document'
5
4
  require 'uri'
6
- require 'active_support'
7
5
 
8
6
  module MediaWiki
9
7
 
10
8
  class Gateway
11
- attr_reader :log
9
+
10
+ USER_AGENT = "#{self}/#{VERSION}"
11
+
12
+ class << self
13
+
14
+ attr_accessor :default_user_agent
15
+
16
+ end
12
17
 
13
18
  # Set up a MediaWiki::Gateway for a given MediaWiki installation
14
19
  #
15
- # [url] Path to API of target MediaWiki (eg. "http://en.wikipedia.org/w/api.php")
20
+ # [url] Path to API of target MediaWiki (eg. 'http://en.wikipedia.org/w/api.php')
16
21
  # [options] Hash of options
17
22
  # [http_options] Hash of options for RestClient::Request (via http_send)
18
23
  #
@@ -25,912 +30,59 @@ module MediaWiki
25
30
  # [:maxlag] Maximum allowed server lag (see http://www.mediawiki.org/wiki/Manual:Maxlag_parameter), defaults to 5 seconds.
26
31
  # [:retry_count] Number of times to try before giving up if MediaWiki returns 503 Service Unavailable, defaults to 3 (original request plus two retries).
27
32
  # [:retry_delay] Seconds to wait before retry if MediaWiki returns 503 Service Unavailable, defaults to 10 seconds.
28
- def initialize(url, options={}, http_options={})
29
- default_options = {
30
- :bot => false,
31
- :limit => 500,
32
- :logdevice => STDERR,
33
- :loglevel => Logger::WARN,
34
- :maxlag => 5,
35
- :retry_count => 3,
36
- :retry_delay => 10,
37
- :max_results => 500
38
- }
39
- @options = default_options.merge(options)
40
- @http_options = http_options
41
- @wiki_url = url
33
+ # [:user_agent] User-Agent header to send with requests, defaults to ::default_user_agent or nil.
34
+ def initialize(url, options = {}, http_options = {})
35
+ @options = {
36
+ bot: false,
37
+ limit: 500,
38
+ logdevice: STDERR,
39
+ loglevel: Logger::WARN,
40
+ max_results: 500,
41
+ maxlag: 5,
42
+ retry_count: 3,
43
+ retry_delay: 10,
44
+ user_agent: self.class.default_user_agent
45
+ }.merge(options)
46
+
42
47
  @log = Logger.new(@options[:logdevice])
43
48
  @log.level = @options[:loglevel]
44
- @headers = { "User-Agent" => "MediaWiki::Gateway/#{MediaWiki::VERSION}", "Accept-Encoding" => "gzip" }
45
- @cookies = {}
46
- end
47
-
48
- attr_reader :base_url, :cookies
49
-
50
- # Login to MediaWiki
51
- #
52
- # [username] Username
53
- # [password] Password
54
- # [domain] Domain for authentication plugin logins (eg. LDAP), optional -- defaults to 'local' if not given
55
- # [options] Hash of additional options
56
- #
57
- # Throws MediaWiki::Unauthorized if login fails
58
- def login(username, password, domain = 'local', options = {})
59
- make_api_request(options.merge(
60
- 'action' => 'login',
61
- 'lgname' => username,
62
- 'lgpassword' => password,
63
- 'lgdomain' => domain
64
- ))
65
-
66
- @password = password
67
- @username = username
68
- end
69
-
70
- # Fetch MediaWiki page in MediaWiki format. Does not follow redirects.
71
- #
72
- # [page_title] Page title to fetch
73
- # [options] Hash of additional options
74
- #
75
- # Returns content of page as string, nil if the page does not exist.
76
- def get(page_title, options = {})
77
- page = make_api_request(options.merge(
78
- 'action' => 'query',
79
- 'prop' => 'revisions',
80
- 'rvprop' => 'content',
81
- 'titles' => page_title
82
- )).first.elements['query/pages/page']
83
-
84
- page.elements['revisions/rev'].text || '' if valid_page?(page)
85
- end
86
-
87
- # Fetch latest revision ID of a MediaWiki page. Does not follow redirects.
88
- #
89
- # [page_title] Page title to fetch
90
- # [options] Hash of additional options
91
- #
92
- # Returns revision ID as a string, nil if the page does not exist.
93
- def revision(page_title, options = {})
94
- page = make_api_request(options.merge(
95
- 'action' => 'query',
96
- 'prop' => 'revisions',
97
- 'rvprop' => 'ids',
98
- 'rvlimit' => 1,
99
- 'titles' => page_title
100
- )).first.elements['query/pages/page']
101
-
102
- page.elements['revisions/rev'].attributes['revid'] if valid_page?(page)
103
- end
104
-
105
- # Render a MediaWiki page as HTML
106
- #
107
- # [page_title] Page title to fetch
108
- # [options] Hash of additional options
109
- #
110
- # Options:
111
- # * [:linkbase] supply a String to prefix all internal (relative) links with. '/wiki/' is assumed to be the base of a relative link
112
- # * [:noeditsections] strips all edit-links if set to +true+
113
- # * [:noimages] strips all +img+ tags from the rendered text if set to +true+
114
- #
115
- # Returns rendered page as string, or nil if the page does not exist
116
- def render(page_title, options = {})
117
- form_data = {'action' => 'parse', 'page' => page_title}
118
49
 
119
- valid_options = %w(linkbase noeditsections noimages)
120
- # Check options
121
- options.keys.each{|opt| raise ArgumentError.new("Unknown option '#{opt}'") unless valid_options.include?(opt.to_s)}
122
-
123
- rendered = nil
124
- parsed = make_api_request(form_data).first.elements["parse"]
125
- if parsed.attributes["revid"] != '0'
126
- rendered = parsed.elements["text"].text.gsub(/<!--(.|\s)*?-->/, '')
127
- # OPTIMIZE: unifiy the keys in +options+ like symbolize_keys! but w/o
128
- if options["linkbase"] or options[:linkbase]
129
- linkbase = options["linkbase"] || options[:linkbase]
130
- rendered = rendered.gsub(/\shref="\/wiki\/([\w\(\)\-\.%:,]*)"/, ' href="' + linkbase + '/wiki/\1"')
131
- end
132
- if options["noeditsections"] or options[:noeditsections]
133
- rendered = rendered.gsub(/<span class="editsection">\[.+\]<\/span>/, '')
134
- end
135
- if options["noimages"] or options[:noimages]
136
- rendered = rendered.gsub(/<img.*\/>/, '')
137
- end
138
- end
139
- rendered
140
- end
141
-
142
- # Create a new page, or overwrite an existing one
143
- #
144
- # [title] Page title to create or overwrite, string
145
- # [content] Content for the page, string
146
- # [options] Hash of additional options
147
- #
148
- # Options:
149
- # * [:overwrite] Allow overwriting existing pages
150
- # * [:summary] Edit summary for history, string
151
- # * [:token] Use this existing edit token instead requesting a new one (useful for bulk loads)
152
- # * [:minor] Mark this edit as "minor" if true, mark this edit as "major" if false, leave major/minor status by default if not specified
153
- # * [:notminor] Mark this edit as "major" if true
154
- # * [:bot] Set the bot parameter (see http://www.mediawiki.org/wiki/API:Edit#Parameters). Defaults to false.
155
- def create(title, content, options={})
156
- form_data = {'action' => 'edit', 'title' => title, 'text' => content, 'summary' => (options[:summary] || ""), 'token' => get_token('edit', title)}
157
- if @options[:bot] or options[:bot]
158
- form_data['bot'] = '1'
159
- form_data['assert'] = 'bot'
160
- end
161
- form_data['minor'] = '1' if options[:minor]
162
- form_data['notminor'] = '1' if options[:minor] == false or options[:notminor]
163
- form_data['createonly'] = "" unless options[:overwrite]
164
- form_data['section'] = options[:section].to_s if options[:section]
165
- make_api_request(form_data)
166
- end
167
-
168
- # Edit page
169
- #
170
- # Same options as create, but always overwrites existing pages (and creates them if they don't exist already).
171
- def edit(title, content, options={})
172
- create(title, content, {:overwrite => true}.merge(options))
173
- end
174
-
175
- # Protect/unprotect a page
176
- #
177
- # Arguments:
178
- # * [title] Page title to protect, string
179
- # * [protections] Protections to apply, hash or array of hashes
180
- #
181
- # Protections:
182
- # * [:action] (required) The action to protect, string
183
- # * [:group] (required) The group allowed to perform the action, string
184
- # * [:expiry] The protection expiry as a GNU timestamp, string
185
- #
186
- # * [options] Hash of additional options
187
- #
188
- # Options:
189
- # * [:cascade] Protect pages included in this page, boolean
190
- # * [:reason] Reason for protection, string
191
- #
192
- # Examples:
193
- # 1. mw.protect('Main Page', {:action => 'edit', :group => 'all'}, {:cascade => true})
194
- # 2. prt = [{:action => 'move', :group => 'sysop', :expiry => 'never'},
195
- # {:action => 'edit', :group => 'autoconfirmed', :expiry => 'next Monday 16:04:57'}]
196
- # mw.protect('Main Page', prt, {:reason => 'awesomeness'})
197
- #
198
- def protect(title, protections, options={})
199
- # validate and format protections
200
- protections = [protections] if protections.is_a?(Hash)
201
- raise ArgumentError.new("Invalid type '#{protections.class}' for protections") unless protections.is_a?(Array)
202
- valid_prt_options = %w(action group expiry)
203
- required_prt_options = %w(action group)
204
- p,e = [],[]
205
- protections.each do |prt|
206
- existing_prt_options = []
207
- prt.keys.each do |opt|
208
- if valid_prt_options.include?(opt.to_s)
209
- existing_prt_options.push(opt.to_s)
210
- else
211
- raise ArgumentError.new("Unknown option '#{opt}' for protections")
212
- end
213
- end
214
- required_prt_options.each{|opt| raise ArgumentError.new("Missing required option '#{opt}' for protections") unless existing_prt_options.include?(opt)}
215
- p.push("#{prt[:action]}=#{prt[:group]}")
216
- if prt.has_key?(:expiry)
217
- e.push(prt[:expiry].to_s)
218
- else
219
- e.push('never')
220
- end
221
- end
222
-
223
- # validate options
224
- valid_options = %w(cascade reason)
225
- options.keys.each{|opt| raise ArgumentError.new("Unknown option '#{opt}'") unless valid_options.include?(opt.to_s)}
226
-
227
- # make API request
228
- form_data = {'action' => 'protect', 'title' => title, 'token' => get_token('protect', title)}
229
- form_data['protections'] = p.join('|')
230
- form_data['expiry'] = e.join('|')
231
- form_data['cascade'] = '' if options[:cascade] === true
232
- form_data['reason'] = options[:reason].to_s if options[:reason]
233
- make_api_request(form_data)
234
- end
235
-
236
- # Move a page to a new title
237
- #
238
- # [from] Old page name
239
- # [to] New page name
240
- # [options] Hash of additional options
241
- #
242
- # Options:
243
- # * [:movesubpages] Move associated subpages
244
- # * [:movetalk] Move associated talkpages
245
- # * [:noredirect] Do not create a redirect page from old name. Requires the 'suppressredirect' user right, otherwise MW will silently ignore the option and create the redirect anyway.
246
- # * [:reason] Reason for move
247
- # * [:watch] Add page and any redirect to watchlist
248
- # * [:unwatch] Remove page and any redirect from watchlist
249
- def move(from, to, options={})
250
- valid_options = %w(movesubpages movetalk noredirect reason watch unwatch)
251
- options.keys.each{|opt| raise ArgumentError.new("Unknown option '#{opt}'") unless valid_options.include?(opt.to_s)}
252
-
253
- form_data = options.merge({'action' => 'move', 'from' => from, 'to' => to, 'token' => get_token('move', from)})
254
- make_api_request(form_data)
255
- end
256
-
257
- # Delete one page. (MediaWiki API does not support deleting multiple pages at a time.)
258
- #
259
- # [title] Title of page to delete
260
- # [options] Hash of additional options
261
- def delete(title, options = {})
262
- make_api_request(options.merge(
263
- 'action' => 'delete',
264
- 'title' => title,
265
- 'token' => get_token('delete', title)
266
- ))
267
- end
268
-
269
- # Undelete all revisions of one page.
270
- #
271
- # [title] Title of page to undelete
272
- # [options] Hash of additional options
273
- #
274
- # Returns number of revisions undeleted, or zero if nothing to undelete
275
- def undelete(title, options = {})
276
- if token = get_undelete_token(title)
277
- make_api_request(options.merge(
278
- 'action' => 'undelete',
279
- 'title' => title,
280
- 'token' => token
281
- )).first.elements['undelete'].attributes['revisions'].to_i
282
- else
283
- 0 # No revisions to undelete
284
- end
285
- end
286
-
287
- # Get a list of matching page titles in a namespace
288
- #
289
- # [key] Search key, matched as a prefix (^key.*). May contain or equal a namespace, defaults to main (namespace 0) if none given.
290
- # [options] Optional hash of additional options, eg. { 'apfilterredir' => 'nonredirects' }. See http://www.mediawiki.org/wiki/API:Allpages
291
- #
292
- # Returns array of page titles (empty if no matches)
293
- def list(key, options = {})
294
- key, namespace = key.split(':', 2).reverse
295
- namespace = namespaces_by_prefix[namespace] || 0
296
-
297
- iterate_query('allpages', '//p', 'title', 'apfrom', options.merge(
298
- 'list' => 'allpages',
299
- 'apprefix' => key,
300
- 'apnamespace' => namespace,
301
- 'aplimit' => @options[:limit]
302
- ))
303
- end
304
-
305
- # Get a list of pages that are members of a category
306
- #
307
- # [category] Name of the category
308
- # [options] Optional hash of additional options. See http://www.mediawiki.org/wiki/API:Categorymembers
309
- #
310
- # Returns array of page titles (empty if no matches)
311
- def category_members(category, options = {})
312
- iterate_query('categorymembers', '//cm', 'title', 'cmcontinue', options.merge(
313
- 'cmtitle' => category,
314
- 'cmlimit' => @options[:limit]
315
- ))
316
- end
317
-
318
- # Get a list of pages that link to a target page
319
- #
320
- # [title] Link target page
321
- # [filter] "all" links (default), "redirects" only, or "nonredirects" (plain links only)
322
- # [options] Hash of additional options
323
- #
324
- # Returns array of page titles (empty if no matches)
325
- def backlinks(title, filter = 'all', options = {})
326
- iterate_query('backlinks', '//bl', 'title', 'blcontinue', options.merge(
327
- 'bltitle' => title,
328
- 'blfilterredir' => filter,
329
- 'bllimit' => @options[:limit]
330
- ))
331
- end
332
-
333
- # Get a list of pages with matching content in given namespaces
334
- #
335
- # [key] Search key
336
- # [namespaces] Array of namespace names to search (defaults to main only)
337
- # [limit] Maximum number of hits to ask for (defaults to 500; note that Wikimedia Foundation wikis allow only 50 for normal users)
338
- # [max_results] Maximum total number of results to return
339
- # [options] Hash of additional options
340
- #
341
- # Returns array of page titles (empty if no matches)
342
- def search(key, namespaces = nil, limit = @options[:limit], max_results = @options[:max_results], options = {})
343
- titles = []
344
- offset = 0
345
-
346
- form_data = options.merge(
347
- 'action' => 'query',
348
- 'list' => 'search',
349
- 'srwhat' => 'text',
350
- 'srsearch' => key,
351
- 'srlimit' => limit
352
- )
353
-
354
- if namespaces
355
- namespaces = [ namespaces ] unless namespaces.kind_of? Array
356
- form_data['srnamespace'] = namespaces.map! do |ns| namespaces_by_prefix[ns] end.join('|')
357
- end
358
-
359
- begin
360
- form_data['sroffset'] = offset if offset
361
- form_data['srlimit'] = [limit, max_results - offset.to_i].min
362
- res, offset = make_api_request(form_data, '//query-continue/search/@sroffset')
363
- titles += REXML::XPath.match(res, "//p").map { |x| x.attributes["title"] }
364
- end while offset && offset.to_i < max_results.to_i
365
-
366
- titles
367
- end
368
-
369
- # Get a list of users
370
- #
371
- # [options] Optional hash of options, eg. { 'augroup' => 'sysop' }. See http://www.mediawiki.org/wiki/API:Allusers
372
- #
373
- # Returns array of user names (empty if no matches)
374
- def users(options = {})
375
- iterate_query('allusers', '//u', 'name', 'aufrom', options.merge(
376
- 'aulimit' => @options[:limit]
377
- ))
378
- end
379
-
380
- # Get user contributions
381
- #
382
- # user: The user name
383
- # count: Maximum number of contributions to retreive, or nil for all
384
- # [options] Optional hash of options, eg. { 'ucnamespace' => 4 }. See http://www.mediawiki.org/wiki/API:Usercontribs
385
- #
386
- # Returns array of hashes containing the "item" attributes defined here: http://www.mediawiki.org/wiki/API:Usercontribs
387
- def contributions(user, count = nil, options = {})
388
- result = []
389
-
390
- iterate_query('usercontribs', '//item', nil, 'uccontinue', options.merge(
391
- 'ucuser' => user,
392
- 'uclimit' => @options[:limit]
393
- )) { |element|
394
- result << hash = {}
395
- element.attributes.each { |key, value| hash[key] = value }
50
+ @http_options, @wiki_url, @cookies, @headers = http_options, url, {}, {
51
+ 'User-Agent' => [@options[:user_agent], USER_AGENT].compact.join(' '),
52
+ 'Accept-Encoding' => 'gzip'
396
53
  }
397
-
398
- count ? result.take(count) : result
399
54
  end
400
55
 
401
- # Upload a file, or get the status of pending uploads. Several
402
- # methods are available:
403
- #
404
- # * Upload file contents directly.
405
- # * Have the MediaWiki server fetch a file from a URL, using the
406
- # "url" parameter
407
- #
408
- # Requires Mediawiki 1.16+
409
- #
410
- # Arguments:
411
- # * [path] Path to file to upload. Set to nil if uploading from URL.
412
- # * [options] Hash of additional options
413
- #
414
- # Note that queries using session keys must be done in the same login
415
- # session as the query that originally returned the key (i.e. do not
416
- # log out and then log back in).
417
- #
418
- # Options:
419
- # * 'filename' - Target filename (defaults to local name if not given), options[:target] is alias for this.
420
- # * 'comment' - Upload comment. Also used as the initial page text for new files if "text" is not specified.
421
- # * 'text' - Initial page text for new files
422
- # * 'watch' - Watch the page
423
- # * 'ignorewarnings' - Ignore any warnings
424
- # * 'url' - Url to fetch the file from. Set path to nil if you want to use this.
425
- #
426
- # Deprecated but still supported options:
427
- # * :description - Description of this file. Used as 'text'.
428
- # * :target - Target filename, same as 'filename'.
429
- # * :summary - Edit summary for history. Used as 'comment'. Also used as 'text' if neither it or :description is specified.
430
- #
431
- # Examples:
432
- # mw.upload('/path/to/local/file.jpg', 'filename' => "RemoteFile.jpg")
433
- # mw.upload(nil, 'filename' => "RemoteFile2.jpg", 'url' => 'http://remote.com/server/file.jpg')
434
- #
435
- def upload(path, options={})
436
- if options[:description]
437
- options['text'] = options[:description]
438
- options.delete(:description)
439
- end
440
-
441
- if options[:target]
442
- options['filename'] = options[:target]
443
- options.delete(:target)
444
- end
445
-
446
- if options[:summary]
447
- options['text'] ||= options[:summary]
448
- options['comment'] = options[:summary]
449
- options.delete(:summary)
450
- end
451
-
452
- options['comment'] ||= "Uploaded by MediaWiki::Gateway"
453
- options['file'] = File.new(path) if path
454
- full_name = path || options['url']
455
- options['filename'] ||= File.basename(full_name) if full_name
456
-
457
- raise ArgumentError.new(
458
- "One of the 'file', 'url' or 'sessionkey' options must be specified!"
459
- ) unless options['file'] || options['url'] || options['sessionkey']
460
-
461
- form_data = options.merge(
462
- 'action' => 'upload',
463
- 'token' => get_token('edit', options['filename'])
464
- )
465
-
466
- make_api_request(form_data)
467
- end
468
-
469
- # Checks if page is a redirect.
470
- #
471
- # [page_title] Page title to fetch
472
- #
473
- # Returns true if the page is a redirect, false if it is not or the page does not exist.
474
- def redirect?(page_title)
475
- form_data = {'action' => 'query', 'prop' => 'info', 'titles' => page_title}
476
- page = make_api_request(form_data).first.elements["query/pages/page"]
477
- !!(valid_page?(page) and page.attributes["redirect"])
478
- end
479
-
480
- # Get image list for given article[s]. Follows redirects.
481
- #
482
- # _article_or_pageid_ is the title or pageid of a single article
483
- # _imlimit_ is the maximum number of images to return (defaults to 200)
484
- # _options_ is the hash of additional options
485
- #
486
- # Example:
487
- # images = mw.images('Gaborone')
488
- # _images_ would contain ['File:Gaborone at night.jpg', 'File:Gaborone2.png', ...]
489
- def images(article_or_pageid, imlimit = 200, options = {})
490
- form_data = options.merge(
491
- 'action' => 'query',
492
- 'prop' => 'images',
493
- 'imlimit' => imlimit,
494
- 'redirects' => true
495
- )
496
-
497
- case article_or_pageid
498
- when Fixnum
499
- form_data['pageids'] = article_or_pageid
500
- else
501
- form_data['titles'] = article_or_pageid
502
- end
503
- xml, _ = make_api_request(form_data)
504
- page = xml.elements["query/pages/page"]
505
- if valid_page? page
506
- if xml.elements["query/redirects/r"]
507
- # We're dealing with redirect here.
508
- images(page.attributes["pageid"].to_i, imlimit)
509
- else
510
- REXML::XPath.match(page, "images/im").map { |x| x.attributes["title"] }
511
- end
512
- else
513
- nil
514
- end
515
- end
516
-
517
- # Get list of interlanguage links for given article[s]. Follows redirects. Returns a hash like { 'id' => 'Yerusalem', 'en' => 'Jerusalem', ... }
518
- #
519
- # _article_or_pageid_ is the title or pageid of a single article
520
- # _lllimit_ is the maximum number of langlinks to return (defaults to 500, the maximum)
521
- # _options_ is the hash of additional options
522
- #
523
- # Example:
524
- # langlinks = mw.langlinks('Jerusalem')
525
- def langlinks(article_or_pageid, lllimit = 500, options = {})
526
- form_data = options.merge(
527
- 'action' => 'query',
528
- 'prop' => 'langlinks',
529
- 'lllimit' => lllimit,
530
- 'redirects' => true
531
- )
56
+ attr_reader :log, :wiki_url, :cookies, :headers
532
57
 
533
- case article_or_pageid
534
- when Fixnum
535
- form_data['pageids'] = article_or_pageid
536
- else
537
- form_data['titles'] = article_or_pageid
538
- end
539
- xml, _ = make_api_request(form_data)
540
- page = xml.elements["query/pages/page"]
541
- if valid_page? page
542
- if xml.elements["query/redirects/r"]
543
- # We're dealing with the redirect here.
544
- langlinks(page.attributes["pageid"].to_i, lllimit)
545
- else
546
- langl = REXML::XPath.match(page, 'langlinks/ll')
547
- if langl.nil?
548
- nil
549
- else
550
- links = {}
551
- langl.each{ |ll| links[ll.attributes["lang"]] = ll.children[0].to_s }
552
- return links
553
- end
554
- end
555
- else
556
- nil
557
- end
558
- end
559
-
560
- # Convenience wrapper for _langlinks_ returning the title in language _lang_ (ISO code) for a given article of pageid, if it exists, via the interlanguage link
561
- #
562
- # Example:
563
- #
564
- # langlink = mw.langlink_for_lang('Tycho Brahe', 'de')
565
- def langlink_for_lang(article_or_pageid, lang)
566
- return langlinks(article_or_pageid)[lang]
567
- end
568
-
569
- # Requests image info from MediaWiki. Follows redirects.
570
- #
571
- # _file_name_or_page_id_ should be either:
572
- # * a file name (String) you want info about without File: prefix.
573
- # * or a Fixnum page id you of the file.
574
- #
575
- # _options_ is +Hash+ passed as query arguments. See
576
- # http://www.mediawiki.org/wiki/API:Query_-_Properties#imageinfo_.2F_ii
577
- # for more information.
578
- #
579
- # options['iiprop'] should be either a string of properties joined by
580
- # '|' or an +Array+ (or more precisely something that responds to #join).
581
- #
582
- # +Hash+ like object is returned where keys are image properties.
583
- #
584
- # Example:
585
- # mw.image_info(
586
- # "Trooper.jpg", 'iiprop' => ['timestamp', 'user']
587
- # ).each do |key, value|
588
- # puts "#{key.inspect} => #{value.inspect}"
589
- # end
590
- #
591
- # Output:
592
- # "timestamp" => "2009-10-31T12:59:11Z"
593
- # "user" => "Valdas"
594
- #
595
- def image_info(file_name_or_page_id, options={})
596
- options['iiprop'] = options['iiprop'].join('|') \
597
- if options['iiprop'].respond_to?(:join)
598
- form_data = options.merge(
599
- 'action' => 'query',
600
- 'prop' => 'imageinfo',
601
- 'redirects' => true
602
- )
603
-
604
- case file_name_or_page_id
605
- when Fixnum
606
- form_data['pageids'] = file_name_or_page_id
607
- else
608
- form_data['titles'] = "File:#{file_name_or_page_id}"
609
- end
610
-
611
- xml, _ = make_api_request(form_data)
612
- page = xml.elements["query/pages/page"]
613
- if valid_page? page
614
- if xml.elements["query/redirects/r"]
615
- # We're dealing with redirect here.
616
- image_info(page.attributes["pageid"].to_i, options)
617
- else
618
- page.elements["imageinfo/ii"].attributes
619
- end
620
- else
621
- nil
622
- end
623
- end
624
-
625
- # Download _file_name_ (without "File:" or "Image:" prefix). Returns file contents. All options are passed to
626
- # #image_info however options['iiprop'] is forced to url. You can still
627
- # set other options to control what file you want to download.
628
- def download(file_name, options={})
629
- options['iiprop'] = 'url'
630
-
631
- attributes = image_info(file_name, options)
632
- if attributes
633
- RestClient.get attributes['url']
634
- else
635
- nil
636
- end
637
- end
638
-
639
- # Imports a MediaWiki XML dump
640
- #
641
- # [xml] String or array of page names to fetch
642
- # [options] Hash of additional options
643
- #
644
- # Returns XML array <api><import><page/><page/>...
645
- # <page revisions="1"> (or more) means successfully imported
646
- # <page revisions="0"> means duplicate, not imported
647
- def import(xmlfile, options = {})
648
- make_api_request(options.merge(
649
- 'action' => 'import',
650
- 'xml' => File.new(xmlfile),
651
- 'token' => get_token('import', 'Main Page'), # NB: dummy page name
652
- 'format' => 'xml'
653
- ))
654
- end
655
-
656
- # Exports a page or set of pages
657
- #
658
- # [page_titles] String or array of page titles to fetch
659
- # [options] Hash of additional options
660
- #
661
- # Returns MediaWiki XML dump
662
- def export(page_titles, options = {})
663
- make_api_request(options.merge(
664
- 'action' => 'query',
665
- 'titles' => Array(page_titles).join('|'),
666
- 'export' => nil,
667
- 'exportnowrap' => nil
668
- )).first
669
- end
670
-
671
- # Get the wiki's siteinfo as a hash. See http://www.mediawiki.org/wiki/API:Siteinfo.
672
- #
673
- # [options] Hash of additional options
674
- def siteinfo(options = {})
675
- res = make_api_request(options.merge(
676
- 'action' => 'query',
677
- 'meta' => 'siteinfo'
678
- )).first
679
-
680
- REXML::XPath.first(res, '//query/general')
681
- .attributes.each_with_object({}) { |(k, v), h| h[k] = v }
682
- end
683
-
684
- # Get the wiki's MediaWiki version.
685
- #
686
- # [options] Hash of additional options passed to #siteinfo
687
- def version(options = {})
688
- siteinfo(options).fetch('generator', '').split.last
689
- end
690
-
691
- # Get a list of all known namespaces
692
- #
693
- # [options] Hash of additional options
694
- #
695
- # Returns array of namespaces (name => id)
696
- def namespaces_by_prefix(options = {})
697
- res = make_api_request(options.merge(
698
- 'action' => 'query',
699
- 'meta' => 'siteinfo',
700
- 'siprop' => 'namespaces'
701
- )).first
702
-
703
- REXML::XPath.match(res, "//ns").inject(Hash.new) do |namespaces, namespace|
704
- prefix = namespace.attributes["canonical"] || ""
705
- namespaces[prefix] = namespace.attributes["id"].to_i
706
- namespaces
707
- end
708
- end
709
-
710
- # Get a list of all installed (and registered) extensions
711
- #
712
- # [options] Hash of additional options
713
- #
714
- # Returns array of extensions (name => version)
715
- def extensions(options = {})
716
- res = make_api_request(options.merge(
717
- 'action' => 'query',
718
- 'meta' => 'siteinfo',
719
- 'siprop' => 'extensions'
720
- )).first
721
-
722
- REXML::XPath.match(res, "//ext").inject(Hash.new) do |extensions, extension|
723
- name = extension.attributes["name"] || ""
724
- extensions[name] = extension.attributes["version"]
725
- extensions
726
- end
727
- end
728
-
729
- # Sends e-mail to a user
730
- #
731
- # [user] Username to send mail to (name only: eg. 'Bob', not 'User:Bob')
732
- # [subject] Subject of message
733
- # [content] Content of message
734
- # [options] Hash of additional options
735
- #
736
- # Will raise a 'noemail' APIError if the target user does not have a confirmed email address, see http://www.mediawiki.org/wiki/API:E-mail for details.
737
- def email_user(user, subject, text, options = {})
738
- res = make_api_request(options.merge(
739
- 'action' => 'emailuser',
740
- 'target' => user,
741
- 'subject' => subject,
742
- 'text' => text,
743
- 'token' => get_token('email', "User:#{user}")
744
- )).first
745
-
746
- res.elements['emailuser'].attributes['result'] == 'Success'
747
- end
748
-
749
- # Execute Semantic Mediawiki query
750
- #
751
- # [query] Semantic Mediawiki query
752
- # [params] Array of additional parameters or options, eg. mainlabel=Foo or ?Place (optional)
753
- # [options] Hash of additional options
754
- #
755
- # Returns result as an HTML string
756
- def semantic_query(query, params = [], options = {})
757
- unless smw_version = extensions['Semantic MediaWiki']
758
- raise MediaWiki::Exception, 'Semantic MediaWiki extension not installed.'
759
- end
760
-
761
- if smw_version.to_f >= 1.7
762
- make_api_request(options.merge(
763
- 'action' => 'ask',
764
- 'query' => "#{query}|#{params.join('|')}"
765
- )).first
766
- else
767
- make_api_request(options.merge(
768
- 'action' => 'parse',
769
- 'prop' => 'text',
770
- 'text' => "{{#ask:#{query}|#{params.push('format=list').join('|')}}}"
771
- )).first.elements['parse/text'].text
772
- end
773
- end
774
-
775
- # Create a new account
776
- #
777
- # [options] is +Hash+ passed as query arguments. See https://www.mediawiki.org/wiki/API:Account_creation#Parameters for more information.
778
- def create_account(options)
779
- make_api_request(options.merge('action' => 'createaccount')).first
780
- end
781
-
782
- # Sets options for currenlty logged in user
783
- #
784
- # [changes] a +Hash+ that will be transformed into an equal sign and pipe-separated key value parameter
785
- # [optionname] a +String+ indicating which option to change (optional)
786
- # [optionvalue] the new value for optionname - allows pipe characters (optional)
787
- # [reset] a +Boolean+ indicating if all preferences should be reset to site defaults (optional)
788
- # [options] Hash of additional options
789
- def options(changes = {}, optionname = nil, optionvalue = nil, reset = false, options = {})
790
- form_data = options.merge(
791
- 'action' => 'options',
792
- 'token' => get_options_token
793
- )
794
-
795
- if changes.present?
796
- form_data['change'] = changes.map { |key, value| "#{key}=#{value}" }.join('|')
797
- end
798
-
799
- if optionname.present?
800
- form_data[optionname] = optionvalue
801
- end
802
-
803
- if reset
804
- form_data['reset'] = true
805
- end
806
-
807
- make_api_request(form_data).first
808
- end
809
-
810
- # Set groups for a user
58
+ # Make generic request to API
811
59
  #
812
- # [user] Username of user to modify
813
- # [groups_to_add] Groups to add user to, as an array or a string if a single group (optional)
814
- # [groups_to_remove] Groups to remove user from, as an array or a string if a single group (optional)
815
- # [options] Hash of additional options
816
- def set_groups(user, groups_to_add = [], groups_to_remove = [], comment = '', options = {})
817
- token = get_userrights_token(user)
818
- userrights(user, token, groups_to_add, groups_to_remove, comment, options)
819
- end
820
-
821
- # Review current revision of an article (requires FlaggedRevisions extension, see http://www.mediawiki.org/wiki/Extension:FlaggedRevs)
60
+ # [form_data] hash of attributes to post
61
+ # [continue_xpath] XPath selector for query continue parameter
822
62
  #
823
- # [title] Title of article to review
824
- # [flags] Hash of flags and values to set, eg. { "accuracy" => "1", "depth" => "2" }
825
- # [comment] Comment to add to review (optional)
826
- # [options] Hash of additional options
827
- def review(title, flags, comment = "Reviewed by MediaWiki::Gateway", options = {})
828
- raise APIError.new('missingtitle', "Article #{title} not found") unless revid = revision(title)
829
-
830
- form_data = options.merge(
831
- 'action' => 'review',
832
- 'revid' => revid,
833
- 'token' => get_token('edit', title),
834
- 'comment' => comment
835
- )
836
-
837
- flags.each { |k, v| form_data["flag_#{k}"] = v }
838
-
839
- make_api_request(form_data).first
63
+ # Returns XML document
64
+ def send_request(form_data, continue_xpath = nil)
65
+ make_api_request(form_data, continue_xpath).first
840
66
  end
841
67
 
842
68
  private
843
69
 
844
70
  # Fetch token (type 'delete', 'edit', 'email', 'import', 'move', 'protect')
845
71
  def get_token(type, page_titles)
846
- form_data = {'action' => 'query', 'prop' => 'info', 'intoken' => type, 'titles' => page_titles}
847
- res, _ = make_api_request(form_data)
848
- token = res.elements["query/pages/page"].attributes[type + "token"]
849
- raise Unauthorized.new "User is not permitted to perform this operation: #{type}" if token.nil?
850
- token
851
- end
72
+ res = send_request(
73
+ 'action' => 'query',
74
+ 'prop' => 'info',
75
+ 'intoken' => type,
76
+ 'titles' => page_titles
77
+ )
852
78
 
853
- def get_undelete_token(page_titles)
854
- form_data = {'action' => 'query', 'list' => 'deletedrevs', 'prop' => 'info', 'drprop' => 'token', 'titles' => page_titles}
855
- res, _ = make_api_request(form_data)
856
- if res.elements["query/deletedrevs/page"]
857
- token = res.elements["query/deletedrevs/page"].attributes["token"]
858
- raise Unauthorized.new "User is not permitted to perform this operation: #{type}" if token.nil?
859
- token
860
- else
861
- nil
862
- end
863
- end
864
-
865
- # User rights management (aka group assignment)
866
- def get_userrights_token(user)
867
- form_data = {'action' => 'query', 'list' => 'users', 'ustoken' => 'userrights', 'ususers' => user}
868
- res, _ = make_api_request(form_data)
869
- token = res.elements["query/users/user"].attributes["userrightstoken"]
870
-
871
- @log.debug("RESPONSE: #{res.to_s}")
872
- if token.nil?
873
- if res.elements["query/users/user"].attributes["missing"]
874
- raise APIError.new('invaliduser', "User '#{user}' was not found (get_userrights_token)")
875
- else
876
- raise Unauthorized.new "User '#{@username}' is not permitted to perform this operation: get_userrights_token"
877
- end
79
+ unless token = res.elements['query/pages/page'].attributes[type + 'token']
80
+ raise Unauthorized.new "User is not permitted to perform this operation: #{type}"
878
81
  end
879
82
 
880
83
  token
881
84
  end
882
85
 
883
- def get_options_token
884
- form_data = { 'action' => 'tokens', 'type' => 'options' }
885
- res, _ = make_api_request(form_data)
886
- res.elements['tokens'].attributes['optionstoken']
887
- end
888
-
889
- def userrights(user, token, groups_to_add, groups_to_remove, reason, options = {})
890
- # groups_to_add and groups_to_remove can be a string or an array. Turn them into MediaWiki's pipe-delimited list format.
891
- if groups_to_add.is_a? Array
892
- groups_to_add = groups_to_add.join('|')
893
- end
894
-
895
- if groups_to_remove.is_a? Array
896
- groups_to_remove = groups_to_remove.join('|')
897
- end
898
-
899
- make_api_request(options.merge(
900
- 'action' => 'userrights',
901
- 'user' => user,
902
- 'token' => token,
903
- 'add' => groups_to_add,
904
- 'remove' => groups_to_remove,
905
- 'reason' => reason
906
- )).first
907
- end
908
-
909
-
910
- # Make a custom query
911
- #
912
- # [options] query options
913
- #
914
- # Returns the REXML::Element object as result
915
- #
916
- # Example:
917
- # def creation_time(pagename)
918
- # res = bot.custom_query(:prop => :revisions,
919
- # :titles => pagename,
920
- # :rvprop => :timestamp,
921
- # :rvdir => :newer,
922
- # :rvlimit => 1)
923
- # timestr = res.get_elements('*/*/*/rev')[0].attribute('timestamp').to_s
924
- # time.parse(timestr)
925
- # end
926
- #
927
- def custom_query(options)
928
- form_data = {}
929
- options.each {|k,v| form_data[k.to_s] = v.to_s }
930
- form_data['action'] = 'query'
931
- make_api_request(form_data).first.elements['query']
932
- end
933
-
934
86
  # Iterate over query results
935
87
  #
936
88
  # [list] list name to query
@@ -967,37 +119,39 @@ module MediaWiki
967
119
 
968
120
  # Make generic request to API
969
121
  #
970
- # [form_data] hash or string of attributes to post
122
+ # [form_data] hash of attributes to post
971
123
  # [continue_xpath] XPath selector for query continue parameter
972
124
  # [retry_count] Counter for retries
973
125
  #
974
- # Returns XML document
975
- def make_api_request(form_data, continue_xpath=nil, retry_count=1)
976
- if form_data.kind_of? Hash
977
- form_data['format'] = 'xml'
978
- form_data['maxlag'] = @options[:maxlag]
979
- end
980
- http_send(@wiki_url, form_data, @headers.merge({:cookies => @cookies})) do |response, &block|
981
- if response.code == 503 and retry_count < @options[:retry_count]
126
+ # Returns array of XML document and query continue parameter.
127
+ def make_api_request(form_data, continue_xpath = nil, retry_count = 1)
128
+ form_data.update('format' => 'xml', 'maxlag' => @options[:maxlag])
129
+
130
+ http_send(@wiki_url, form_data, @headers.merge(cookies: @cookies)) { |response, &block|
131
+ if response.code == 503 && retry_count < @options[:retry_count]
982
132
  log.warn("503 Service Unavailable: #{response.body}. Retry in #{@options[:retry_delay]} seconds.")
983
- sleep @options[:retry_delay]
133
+ sleep(@options[:retry_delay])
984
134
  make_api_request(form_data, continue_xpath, retry_count + 1)
985
135
  end
136
+
986
137
  # Check response for errors and return XML
987
- raise MediaWiki::Exception.new "Bad response: #{response}" unless response.code >= 200 and response.code < 300
138
+ unless response.code >= 200 && response.code < 300
139
+ raise MediaWiki::Exception.new("Bad response: #{response}")
140
+ end
141
+
988
142
  doc = get_response(response.dup)
989
- action = form_data['action']
990
143
 
991
144
  # login and createaccount actions require a second request with a token received on the first request
992
- if %w(login createaccount).include?(action)
145
+ if %w[login createaccount].include?(action = form_data['action'])
993
146
  action_result = doc.elements[action].attributes['result']
994
- @cookies.merge!(response.cookies)
147
+ @cookies.update(response.cookies)
995
148
 
996
149
  case action_result.downcase
997
- when "success" then
150
+ when 'success'
998
151
  return [doc, false]
999
- when "needtoken"
1000
- token = doc.elements[action].attributes["token"]
152
+ when 'needtoken'
153
+ token = doc.elements[action].attributes['token']
154
+
1001
155
  if action == 'login'
1002
156
  return make_api_request(form_data.merge('lgtoken' => token))
1003
157
  elsif action == 'createaccount'
@@ -1011,23 +165,21 @@ module MediaWiki
1011
165
  end
1012
166
  end
1013
167
  end
1014
- continue = (continue_xpath and doc.elements['query-continue']) ? REXML::XPath.first(doc, continue_xpath) : nil
1015
- return [doc, continue]
1016
- end
168
+
169
+ return [doc, (continue_xpath && doc.elements['query-continue']) ?
170
+ REXML::XPath.first(doc, continue_xpath) : nil]
171
+ }
1017
172
  end
1018
173
 
1019
174
  # Execute the HTTP request using either GET or POST as appropriate
1020
175
  def http_send url, form_data, headers, &block
1021
- opts = @http_options.merge(:url => url, :headers => headers)
176
+ opts = @http_options.merge(url: url, headers: headers)
177
+ opts[:method] = form_data['action'] == 'query' ? :get : :post
178
+ opts[:method] == :get ? headers[:params] = form_data : opts[:payload] = form_data
1022
179
 
1023
- if form_data['action'] == 'query'
1024
- log.debug("GET: #{form_data.inspect}, #{@cookies.inspect}")
1025
- headers[:params] = form_data
1026
- RestClient::Request.execute(opts.update(:method => :get), &block)
1027
- else
1028
- log.debug("POST: #{form_data.inspect}, #{@cookies.inspect}")
1029
- RestClient::Request.execute(opts.update(:method => :post, :payload => form_data), &block)
1030
- end
180
+ log.debug("#{opts[:method].upcase}: #{form_data.inspect}, #{@cookies.inspect}")
181
+
182
+ RestClient::Request.execute(opts, &block)
1031
183
  end
1032
184
 
1033
185
  # Get API XML response
@@ -1035,41 +187,54 @@ module MediaWiki
1035
187
  # Otherwise return XML root
1036
188
  def get_response(res)
1037
189
  begin
1038
- res = res.force_encoding("UTF-8") if res.respond_to?(:force_encoding)
190
+ res = res.force_encoding('UTF-8') if res.respond_to?(:force_encoding)
1039
191
  doc = REXML::Document.new(res).root
1040
192
  rescue REXML::ParseException
1041
- raise MediaWiki::Exception.new "Response is not XML. Are you sure you are pointing to api.php?"
193
+ raise MediaWiki::Exception.new('Response is not XML. Are you sure you are pointing to api.php?')
1042
194
  end
195
+
1043
196
  log.debug("RES: #{doc}")
1044
- raise MediaWiki::Exception.new "Response does not contain Mediawiki API XML: #{res}" unless [ "api", "mediawiki" ].include? doc.name
1045
- if doc.elements["error"]
1046
- code = doc.elements["error"].attributes["code"]
1047
- info = doc.elements["error"].attributes["info"]
1048
- raise APIError.new(code, info)
197
+
198
+ unless %w[api mediawiki].include?(doc.name)
199
+ raise MediaWiki::Exception.new("Response does not contain Mediawiki API XML: #{res}")
1049
200
  end
1050
- if doc.elements["warnings"]
1051
- warning("API warning: #{doc.elements["warnings"].children.map {|e| e.text}.join(", ")}")
201
+
202
+ if error = doc.elements['error']
203
+ raise APIError.new(*error.attributes.values_at(*%w[code info]))
1052
204
  end
205
+
206
+ if warnings = doc.elements['warnings']
207
+ warning("API warning: #{warnings.children.map(&:text).join(', ')}")
208
+ end
209
+
1053
210
  doc
1054
211
  end
1055
212
 
213
+ def validate_options(options, valid_options)
214
+ options.each_key { |opt|
215
+ unless valid_options.include?(opt.to_s)
216
+ raise ArgumentError, "Unknown option '#{opt}'", caller(1)
217
+ end
218
+ }
219
+ end
220
+
1056
221
  def valid_page?(page)
1057
- return false unless page
1058
- return false if page.attributes["missing"]
1059
- if page.attributes["invalid"]
1060
- warning("Invalid title '#{page.attributes["title"]}'")
1061
- else
1062
- true
1063
- end
222
+ page && !page.attributes['missing'] && (!page.attributes['invalid'] ||
223
+ warning("Invalid title '#{page.attributes['title']}'"))
1064
224
  end
1065
225
 
1066
226
  def warning(msg)
1067
- if @options[:ignorewarnings]
1068
- log.warn(msg)
1069
- return false
1070
- else
1071
- raise APIError.new('warning', msg)
1072
- end
227
+ raise APIError.new('warning', msg) unless @options[:ignorewarnings]
228
+ log.warn(msg)
229
+ false
1073
230
  end
231
+
1074
232
  end
233
+
1075
234
  end
235
+
236
+ require_relative 'gateway/files'
237
+ require_relative 'gateway/pages'
238
+ require_relative 'gateway/query'
239
+ require_relative 'gateway/site'
240
+ require_relative 'gateway/users'