mediawiki-gateway 0.6.2 → 1.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/COPYING +22 -0
  3. data/ChangeLog +16 -0
  4. data/README.md +80 -21
  5. data/Rakefile +28 -34
  6. data/bin/mediawiki-gateway +203 -0
  7. data/lib/media_wiki.rb +4 -9
  8. data/lib/media_wiki/exception.rb +11 -8
  9. data/lib/media_wiki/fake_wiki.rb +636 -0
  10. data/lib/media_wiki/gateway.rb +105 -940
  11. data/lib/media_wiki/gateway/files.rb +173 -0
  12. data/lib/media_wiki/gateway/pages.rb +400 -0
  13. data/lib/media_wiki/gateway/query.rb +98 -0
  14. data/lib/media_wiki/gateway/site.rb +101 -0
  15. data/lib/media_wiki/gateway/users.rb +182 -0
  16. data/lib/media_wiki/utils.rb +47 -13
  17. data/lib/media_wiki/version.rb +27 -0
  18. data/lib/mediawiki-gateway.rb +1 -0
  19. data/spec/{import-test-data.xml → data/import.xml} +0 -0
  20. data/spec/media_wiki/gateway/files_spec.rb +34 -0
  21. data/spec/media_wiki/gateway/pages_spec.rb +390 -0
  22. data/spec/media_wiki/gateway/query_spec.rb +84 -0
  23. data/spec/media_wiki/gateway/site_spec.rb +122 -0
  24. data/spec/media_wiki/gateway/users_spec.rb +171 -0
  25. data/spec/media_wiki/gateway_spec.rb +129 -0
  26. data/spec/{live_gateway_spec.rb → media_wiki/live_gateway_spec.rb} +31 -35
  27. data/spec/{utils_spec.rb → media_wiki/utils_spec.rb} +41 -39
  28. data/spec/spec_helper.rb +17 -16
  29. metadata +77 -135
  30. data/.ruby-version +0 -1
  31. data/.rvmrc +0 -34
  32. data/Gemfile +0 -19
  33. data/Gemfile.lock +0 -77
  34. data/LICENSE +0 -21
  35. data/config/hosts.yml +0 -17
  36. data/lib/media_wiki/config.rb +0 -69
  37. data/mediawiki-gateway.gemspec +0 -113
  38. data/samples/README +0 -18
  39. data/samples/create_page.rb +0 -13
  40. data/samples/delete_batch.rb +0 -14
  41. data/samples/download_batch.rb +0 -15
  42. data/samples/email_user.rb +0 -14
  43. data/samples/export_xml.rb +0 -14
  44. data/samples/get_page.rb +0 -11
  45. data/samples/import_xml.rb +0 -14
  46. data/samples/run_fake_media_wiki.rb +0 -8
  47. data/samples/search_content.rb +0 -12
  48. data/samples/semantic_query.rb +0 -17
  49. data/samples/upload_commons.rb +0 -45
  50. data/samples/upload_file.rb +0 -13
  51. data/spec/fake_media_wiki/api_pages.rb +0 -135
  52. data/spec/fake_media_wiki/app.rb +0 -360
  53. data/spec/fake_media_wiki/query_handling.rb +0 -136
  54. data/spec/gateway_spec.rb +0 -888
@@ -1,18 +1,23 @@
1
- require 'rubygems'
2
1
  require 'logger'
3
2
  require 'rest_client'
4
3
  require 'rexml/document'
5
4
  require 'uri'
6
- require 'active_support'
7
5
 
8
6
  module MediaWiki
9
7
 
10
8
  class Gateway
11
- attr_reader :log
9
+
10
+ USER_AGENT = "#{self}/#{VERSION}"
11
+
12
+ class << self
13
+
14
+ attr_accessor :default_user_agent
15
+
16
+ end
12
17
 
13
18
  # Set up a MediaWiki::Gateway for a given MediaWiki installation
14
19
  #
15
- # [url] Path to API of target MediaWiki (eg. "http://en.wikipedia.org/w/api.php")
20
+ # [url] Path to API of target MediaWiki (eg. 'http://en.wikipedia.org/w/api.php')
16
21
  # [options] Hash of options
17
22
  # [http_options] Hash of options for RestClient::Request (via http_send)
18
23
  #
@@ -25,912 +30,59 @@ module MediaWiki
25
30
  # [:maxlag] Maximum allowed server lag (see http://www.mediawiki.org/wiki/Manual:Maxlag_parameter), defaults to 5 seconds.
26
31
  # [:retry_count] Number of times to try before giving up if MediaWiki returns 503 Service Unavailable, defaults to 3 (original request plus two retries).
27
32
  # [:retry_delay] Seconds to wait before retry if MediaWiki returns 503 Service Unavailable, defaults to 10 seconds.
28
- def initialize(url, options={}, http_options={})
29
- default_options = {
30
- :bot => false,
31
- :limit => 500,
32
- :logdevice => STDERR,
33
- :loglevel => Logger::WARN,
34
- :maxlag => 5,
35
- :retry_count => 3,
36
- :retry_delay => 10,
37
- :max_results => 500
38
- }
39
- @options = default_options.merge(options)
40
- @http_options = http_options
41
- @wiki_url = url
33
+ # [:user_agent] User-Agent header to send with requests, defaults to ::default_user_agent or nil.
34
+ def initialize(url, options = {}, http_options = {})
35
+ @options = {
36
+ bot: false,
37
+ limit: 500,
38
+ logdevice: STDERR,
39
+ loglevel: Logger::WARN,
40
+ max_results: 500,
41
+ maxlag: 5,
42
+ retry_count: 3,
43
+ retry_delay: 10,
44
+ user_agent: self.class.default_user_agent
45
+ }.merge(options)
46
+
42
47
  @log = Logger.new(@options[:logdevice])
43
48
  @log.level = @options[:loglevel]
44
- @headers = { "User-Agent" => "MediaWiki::Gateway/#{MediaWiki::VERSION}", "Accept-Encoding" => "gzip" }
45
- @cookies = {}
46
- end
47
-
48
- attr_reader :base_url, :cookies
49
-
50
- # Login to MediaWiki
51
- #
52
- # [username] Username
53
- # [password] Password
54
- # [domain] Domain for authentication plugin logins (eg. LDAP), optional -- defaults to 'local' if not given
55
- # [options] Hash of additional options
56
- #
57
- # Throws MediaWiki::Unauthorized if login fails
58
- def login(username, password, domain = 'local', options = {})
59
- make_api_request(options.merge(
60
- 'action' => 'login',
61
- 'lgname' => username,
62
- 'lgpassword' => password,
63
- 'lgdomain' => domain
64
- ))
65
-
66
- @password = password
67
- @username = username
68
- end
69
-
70
- # Fetch MediaWiki page in MediaWiki format. Does not follow redirects.
71
- #
72
- # [page_title] Page title to fetch
73
- # [options] Hash of additional options
74
- #
75
- # Returns content of page as string, nil if the page does not exist.
76
- def get(page_title, options = {})
77
- page = make_api_request(options.merge(
78
- 'action' => 'query',
79
- 'prop' => 'revisions',
80
- 'rvprop' => 'content',
81
- 'titles' => page_title
82
- )).first.elements['query/pages/page']
83
-
84
- page.elements['revisions/rev'].text || '' if valid_page?(page)
85
- end
86
-
87
- # Fetch latest revision ID of a MediaWiki page. Does not follow redirects.
88
- #
89
- # [page_title] Page title to fetch
90
- # [options] Hash of additional options
91
- #
92
- # Returns revision ID as a string, nil if the page does not exist.
93
- def revision(page_title, options = {})
94
- page = make_api_request(options.merge(
95
- 'action' => 'query',
96
- 'prop' => 'revisions',
97
- 'rvprop' => 'ids',
98
- 'rvlimit' => 1,
99
- 'titles' => page_title
100
- )).first.elements['query/pages/page']
101
-
102
- page.elements['revisions/rev'].attributes['revid'] if valid_page?(page)
103
- end
104
-
105
- # Render a MediaWiki page as HTML
106
- #
107
- # [page_title] Page title to fetch
108
- # [options] Hash of additional options
109
- #
110
- # Options:
111
- # * [:linkbase] supply a String to prefix all internal (relative) links with. '/wiki/' is assumed to be the base of a relative link
112
- # * [:noeditsections] strips all edit-links if set to +true+
113
- # * [:noimages] strips all +img+ tags from the rendered text if set to +true+
114
- #
115
- # Returns rendered page as string, or nil if the page does not exist
116
- def render(page_title, options = {})
117
- form_data = {'action' => 'parse', 'page' => page_title}
118
49
 
119
- valid_options = %w(linkbase noeditsections noimages)
120
- # Check options
121
- options.keys.each{|opt| raise ArgumentError.new("Unknown option '#{opt}'") unless valid_options.include?(opt.to_s)}
122
-
123
- rendered = nil
124
- parsed = make_api_request(form_data).first.elements["parse"]
125
- if parsed.attributes["revid"] != '0'
126
- rendered = parsed.elements["text"].text.gsub(/<!--(.|\s)*?-->/, '')
127
- # OPTIMIZE: unifiy the keys in +options+ like symbolize_keys! but w/o
128
- if options["linkbase"] or options[:linkbase]
129
- linkbase = options["linkbase"] || options[:linkbase]
130
- rendered = rendered.gsub(/\shref="\/wiki\/([\w\(\)\-\.%:,]*)"/, ' href="' + linkbase + '/wiki/\1"')
131
- end
132
- if options["noeditsections"] or options[:noeditsections]
133
- rendered = rendered.gsub(/<span class="editsection">\[.+\]<\/span>/, '')
134
- end
135
- if options["noimages"] or options[:noimages]
136
- rendered = rendered.gsub(/<img.*\/>/, '')
137
- end
138
- end
139
- rendered
140
- end
141
-
142
- # Create a new page, or overwrite an existing one
143
- #
144
- # [title] Page title to create or overwrite, string
145
- # [content] Content for the page, string
146
- # [options] Hash of additional options
147
- #
148
- # Options:
149
- # * [:overwrite] Allow overwriting existing pages
150
- # * [:summary] Edit summary for history, string
151
- # * [:token] Use this existing edit token instead requesting a new one (useful for bulk loads)
152
- # * [:minor] Mark this edit as "minor" if true, mark this edit as "major" if false, leave major/minor status by default if not specified
153
- # * [:notminor] Mark this edit as "major" if true
154
- # * [:bot] Set the bot parameter (see http://www.mediawiki.org/wiki/API:Edit#Parameters). Defaults to false.
155
- def create(title, content, options={})
156
- form_data = {'action' => 'edit', 'title' => title, 'text' => content, 'summary' => (options[:summary] || ""), 'token' => get_token('edit', title)}
157
- if @options[:bot] or options[:bot]
158
- form_data['bot'] = '1'
159
- form_data['assert'] = 'bot'
160
- end
161
- form_data['minor'] = '1' if options[:minor]
162
- form_data['notminor'] = '1' if options[:minor] == false or options[:notminor]
163
- form_data['createonly'] = "" unless options[:overwrite]
164
- form_data['section'] = options[:section].to_s if options[:section]
165
- make_api_request(form_data)
166
- end
167
-
168
- # Edit page
169
- #
170
- # Same options as create, but always overwrites existing pages (and creates them if they don't exist already).
171
- def edit(title, content, options={})
172
- create(title, content, {:overwrite => true}.merge(options))
173
- end
174
-
175
- # Protect/unprotect a page
176
- #
177
- # Arguments:
178
- # * [title] Page title to protect, string
179
- # * [protections] Protections to apply, hash or array of hashes
180
- #
181
- # Protections:
182
- # * [:action] (required) The action to protect, string
183
- # * [:group] (required) The group allowed to perform the action, string
184
- # * [:expiry] The protection expiry as a GNU timestamp, string
185
- #
186
- # * [options] Hash of additional options
187
- #
188
- # Options:
189
- # * [:cascade] Protect pages included in this page, boolean
190
- # * [:reason] Reason for protection, string
191
- #
192
- # Examples:
193
- # 1. mw.protect('Main Page', {:action => 'edit', :group => 'all'}, {:cascade => true})
194
- # 2. prt = [{:action => 'move', :group => 'sysop', :expiry => 'never'},
195
- # {:action => 'edit', :group => 'autoconfirmed', :expiry => 'next Monday 16:04:57'}]
196
- # mw.protect('Main Page', prt, {:reason => 'awesomeness'})
197
- #
198
- def protect(title, protections, options={})
199
- # validate and format protections
200
- protections = [protections] if protections.is_a?(Hash)
201
- raise ArgumentError.new("Invalid type '#{protections.class}' for protections") unless protections.is_a?(Array)
202
- valid_prt_options = %w(action group expiry)
203
- required_prt_options = %w(action group)
204
- p,e = [],[]
205
- protections.each do |prt|
206
- existing_prt_options = []
207
- prt.keys.each do |opt|
208
- if valid_prt_options.include?(opt.to_s)
209
- existing_prt_options.push(opt.to_s)
210
- else
211
- raise ArgumentError.new("Unknown option '#{opt}' for protections")
212
- end
213
- end
214
- required_prt_options.each{|opt| raise ArgumentError.new("Missing required option '#{opt}' for protections") unless existing_prt_options.include?(opt)}
215
- p.push("#{prt[:action]}=#{prt[:group]}")
216
- if prt.has_key?(:expiry)
217
- e.push(prt[:expiry].to_s)
218
- else
219
- e.push('never')
220
- end
221
- end
222
-
223
- # validate options
224
- valid_options = %w(cascade reason)
225
- options.keys.each{|opt| raise ArgumentError.new("Unknown option '#{opt}'") unless valid_options.include?(opt.to_s)}
226
-
227
- # make API request
228
- form_data = {'action' => 'protect', 'title' => title, 'token' => get_token('protect', title)}
229
- form_data['protections'] = p.join('|')
230
- form_data['expiry'] = e.join('|')
231
- form_data['cascade'] = '' if options[:cascade] === true
232
- form_data['reason'] = options[:reason].to_s if options[:reason]
233
- make_api_request(form_data)
234
- end
235
-
236
- # Move a page to a new title
237
- #
238
- # [from] Old page name
239
- # [to] New page name
240
- # [options] Hash of additional options
241
- #
242
- # Options:
243
- # * [:movesubpages] Move associated subpages
244
- # * [:movetalk] Move associated talkpages
245
- # * [:noredirect] Do not create a redirect page from old name. Requires the 'suppressredirect' user right, otherwise MW will silently ignore the option and create the redirect anyway.
246
- # * [:reason] Reason for move
247
- # * [:watch] Add page and any redirect to watchlist
248
- # * [:unwatch] Remove page and any redirect from watchlist
249
- def move(from, to, options={})
250
- valid_options = %w(movesubpages movetalk noredirect reason watch unwatch)
251
- options.keys.each{|opt| raise ArgumentError.new("Unknown option '#{opt}'") unless valid_options.include?(opt.to_s)}
252
-
253
- form_data = options.merge({'action' => 'move', 'from' => from, 'to' => to, 'token' => get_token('move', from)})
254
- make_api_request(form_data)
255
- end
256
-
257
- # Delete one page. (MediaWiki API does not support deleting multiple pages at a time.)
258
- #
259
- # [title] Title of page to delete
260
- # [options] Hash of additional options
261
- def delete(title, options = {})
262
- make_api_request(options.merge(
263
- 'action' => 'delete',
264
- 'title' => title,
265
- 'token' => get_token('delete', title)
266
- ))
267
- end
268
-
269
- # Undelete all revisions of one page.
270
- #
271
- # [title] Title of page to undelete
272
- # [options] Hash of additional options
273
- #
274
- # Returns number of revisions undeleted, or zero if nothing to undelete
275
- def undelete(title, options = {})
276
- if token = get_undelete_token(title)
277
- make_api_request(options.merge(
278
- 'action' => 'undelete',
279
- 'title' => title,
280
- 'token' => token
281
- )).first.elements['undelete'].attributes['revisions'].to_i
282
- else
283
- 0 # No revisions to undelete
284
- end
285
- end
286
-
287
- # Get a list of matching page titles in a namespace
288
- #
289
- # [key] Search key, matched as a prefix (^key.*). May contain or equal a namespace, defaults to main (namespace 0) if none given.
290
- # [options] Optional hash of additional options, eg. { 'apfilterredir' => 'nonredirects' }. See http://www.mediawiki.org/wiki/API:Allpages
291
- #
292
- # Returns array of page titles (empty if no matches)
293
- def list(key, options = {})
294
- key, namespace = key.split(':', 2).reverse
295
- namespace = namespaces_by_prefix[namespace] || 0
296
-
297
- iterate_query('allpages', '//p', 'title', 'apfrom', options.merge(
298
- 'list' => 'allpages',
299
- 'apprefix' => key,
300
- 'apnamespace' => namespace,
301
- 'aplimit' => @options[:limit]
302
- ))
303
- end
304
-
305
- # Get a list of pages that are members of a category
306
- #
307
- # [category] Name of the category
308
- # [options] Optional hash of additional options. See http://www.mediawiki.org/wiki/API:Categorymembers
309
- #
310
- # Returns array of page titles (empty if no matches)
311
- def category_members(category, options = {})
312
- iterate_query('categorymembers', '//cm', 'title', 'cmcontinue', options.merge(
313
- 'cmtitle' => category,
314
- 'cmlimit' => @options[:limit]
315
- ))
316
- end
317
-
318
- # Get a list of pages that link to a target page
319
- #
320
- # [title] Link target page
321
- # [filter] "all" links (default), "redirects" only, or "nonredirects" (plain links only)
322
- # [options] Hash of additional options
323
- #
324
- # Returns array of page titles (empty if no matches)
325
- def backlinks(title, filter = 'all', options = {})
326
- iterate_query('backlinks', '//bl', 'title', 'blcontinue', options.merge(
327
- 'bltitle' => title,
328
- 'blfilterredir' => filter,
329
- 'bllimit' => @options[:limit]
330
- ))
331
- end
332
-
333
- # Get a list of pages with matching content in given namespaces
334
- #
335
- # [key] Search key
336
- # [namespaces] Array of namespace names to search (defaults to main only)
337
- # [limit] Maximum number of hits to ask for (defaults to 500; note that Wikimedia Foundation wikis allow only 50 for normal users)
338
- # [max_results] Maximum total number of results to return
339
- # [options] Hash of additional options
340
- #
341
- # Returns array of page titles (empty if no matches)
342
- def search(key, namespaces = nil, limit = @options[:limit], max_results = @options[:max_results], options = {})
343
- titles = []
344
- offset = 0
345
-
346
- form_data = options.merge(
347
- 'action' => 'query',
348
- 'list' => 'search',
349
- 'srwhat' => 'text',
350
- 'srsearch' => key,
351
- 'srlimit' => limit
352
- )
353
-
354
- if namespaces
355
- namespaces = [ namespaces ] unless namespaces.kind_of? Array
356
- form_data['srnamespace'] = namespaces.map! do |ns| namespaces_by_prefix[ns] end.join('|')
357
- end
358
-
359
- begin
360
- form_data['sroffset'] = offset if offset
361
- form_data['srlimit'] = [limit, max_results - offset.to_i].min
362
- res, offset = make_api_request(form_data, '//query-continue/search/@sroffset')
363
- titles += REXML::XPath.match(res, "//p").map { |x| x.attributes["title"] }
364
- end while offset && offset.to_i < max_results.to_i
365
-
366
- titles
367
- end
368
-
369
- # Get a list of users
370
- #
371
- # [options] Optional hash of options, eg. { 'augroup' => 'sysop' }. See http://www.mediawiki.org/wiki/API:Allusers
372
- #
373
- # Returns array of user names (empty if no matches)
374
- def users(options = {})
375
- iterate_query('allusers', '//u', 'name', 'aufrom', options.merge(
376
- 'aulimit' => @options[:limit]
377
- ))
378
- end
379
-
380
- # Get user contributions
381
- #
382
- # user: The user name
383
- # count: Maximum number of contributions to retreive, or nil for all
384
- # [options] Optional hash of options, eg. { 'ucnamespace' => 4 }. See http://www.mediawiki.org/wiki/API:Usercontribs
385
- #
386
- # Returns array of hashes containing the "item" attributes defined here: http://www.mediawiki.org/wiki/API:Usercontribs
387
- def contributions(user, count = nil, options = {})
388
- result = []
389
-
390
- iterate_query('usercontribs', '//item', nil, 'uccontinue', options.merge(
391
- 'ucuser' => user,
392
- 'uclimit' => @options[:limit]
393
- )) { |element|
394
- result << hash = {}
395
- element.attributes.each { |key, value| hash[key] = value }
50
+ @http_options, @wiki_url, @cookies, @headers = http_options, url, {}, {
51
+ 'User-Agent' => [@options[:user_agent], USER_AGENT].compact.join(' '),
52
+ 'Accept-Encoding' => 'gzip'
396
53
  }
397
-
398
- count ? result.take(count) : result
399
54
  end
400
55
 
401
- # Upload a file, or get the status of pending uploads. Several
402
- # methods are available:
403
- #
404
- # * Upload file contents directly.
405
- # * Have the MediaWiki server fetch a file from a URL, using the
406
- # "url" parameter
407
- #
408
- # Requires Mediawiki 1.16+
409
- #
410
- # Arguments:
411
- # * [path] Path to file to upload. Set to nil if uploading from URL.
412
- # * [options] Hash of additional options
413
- #
414
- # Note that queries using session keys must be done in the same login
415
- # session as the query that originally returned the key (i.e. do not
416
- # log out and then log back in).
417
- #
418
- # Options:
419
- # * 'filename' - Target filename (defaults to local name if not given), options[:target] is alias for this.
420
- # * 'comment' - Upload comment. Also used as the initial page text for new files if "text" is not specified.
421
- # * 'text' - Initial page text for new files
422
- # * 'watch' - Watch the page
423
- # * 'ignorewarnings' - Ignore any warnings
424
- # * 'url' - Url to fetch the file from. Set path to nil if you want to use this.
425
- #
426
- # Deprecated but still supported options:
427
- # * :description - Description of this file. Used as 'text'.
428
- # * :target - Target filename, same as 'filename'.
429
- # * :summary - Edit summary for history. Used as 'comment'. Also used as 'text' if neither it or :description is specified.
430
- #
431
- # Examples:
432
- # mw.upload('/path/to/local/file.jpg', 'filename' => "RemoteFile.jpg")
433
- # mw.upload(nil, 'filename' => "RemoteFile2.jpg", 'url' => 'http://remote.com/server/file.jpg')
434
- #
435
- def upload(path, options={})
436
- if options[:description]
437
- options['text'] = options[:description]
438
- options.delete(:description)
439
- end
440
-
441
- if options[:target]
442
- options['filename'] = options[:target]
443
- options.delete(:target)
444
- end
445
-
446
- if options[:summary]
447
- options['text'] ||= options[:summary]
448
- options['comment'] = options[:summary]
449
- options.delete(:summary)
450
- end
451
-
452
- options['comment'] ||= "Uploaded by MediaWiki::Gateway"
453
- options['file'] = File.new(path) if path
454
- full_name = path || options['url']
455
- options['filename'] ||= File.basename(full_name) if full_name
456
-
457
- raise ArgumentError.new(
458
- "One of the 'file', 'url' or 'sessionkey' options must be specified!"
459
- ) unless options['file'] || options['url'] || options['sessionkey']
460
-
461
- form_data = options.merge(
462
- 'action' => 'upload',
463
- 'token' => get_token('edit', options['filename'])
464
- )
465
-
466
- make_api_request(form_data)
467
- end
468
-
469
- # Checks if page is a redirect.
470
- #
471
- # [page_title] Page title to fetch
472
- #
473
- # Returns true if the page is a redirect, false if it is not or the page does not exist.
474
- def redirect?(page_title)
475
- form_data = {'action' => 'query', 'prop' => 'info', 'titles' => page_title}
476
- page = make_api_request(form_data).first.elements["query/pages/page"]
477
- !!(valid_page?(page) and page.attributes["redirect"])
478
- end
479
-
480
- # Get image list for given article[s]. Follows redirects.
481
- #
482
- # _article_or_pageid_ is the title or pageid of a single article
483
- # _imlimit_ is the maximum number of images to return (defaults to 200)
484
- # _options_ is the hash of additional options
485
- #
486
- # Example:
487
- # images = mw.images('Gaborone')
488
- # _images_ would contain ['File:Gaborone at night.jpg', 'File:Gaborone2.png', ...]
489
- def images(article_or_pageid, imlimit = 200, options = {})
490
- form_data = options.merge(
491
- 'action' => 'query',
492
- 'prop' => 'images',
493
- 'imlimit' => imlimit,
494
- 'redirects' => true
495
- )
496
-
497
- case article_or_pageid
498
- when Fixnum
499
- form_data['pageids'] = article_or_pageid
500
- else
501
- form_data['titles'] = article_or_pageid
502
- end
503
- xml, _ = make_api_request(form_data)
504
- page = xml.elements["query/pages/page"]
505
- if valid_page? page
506
- if xml.elements["query/redirects/r"]
507
- # We're dealing with redirect here.
508
- images(page.attributes["pageid"].to_i, imlimit)
509
- else
510
- REXML::XPath.match(page, "images/im").map { |x| x.attributes["title"] }
511
- end
512
- else
513
- nil
514
- end
515
- end
516
-
517
- # Get list of interlanguage links for given article[s]. Follows redirects. Returns a hash like { 'id' => 'Yerusalem', 'en' => 'Jerusalem', ... }
518
- #
519
- # _article_or_pageid_ is the title or pageid of a single article
520
- # _lllimit_ is the maximum number of langlinks to return (defaults to 500, the maximum)
521
- # _options_ is the hash of additional options
522
- #
523
- # Example:
524
- # langlinks = mw.langlinks('Jerusalem')
525
- def langlinks(article_or_pageid, lllimit = 500, options = {})
526
- form_data = options.merge(
527
- 'action' => 'query',
528
- 'prop' => 'langlinks',
529
- 'lllimit' => lllimit,
530
- 'redirects' => true
531
- )
56
+ attr_reader :log, :wiki_url, :cookies, :headers
532
57
 
533
- case article_or_pageid
534
- when Fixnum
535
- form_data['pageids'] = article_or_pageid
536
- else
537
- form_data['titles'] = article_or_pageid
538
- end
539
- xml, _ = make_api_request(form_data)
540
- page = xml.elements["query/pages/page"]
541
- if valid_page? page
542
- if xml.elements["query/redirects/r"]
543
- # We're dealing with the redirect here.
544
- langlinks(page.attributes["pageid"].to_i, lllimit)
545
- else
546
- langl = REXML::XPath.match(page, 'langlinks/ll')
547
- if langl.nil?
548
- nil
549
- else
550
- links = {}
551
- langl.each{ |ll| links[ll.attributes["lang"]] = ll.children[0].to_s }
552
- return links
553
- end
554
- end
555
- else
556
- nil
557
- end
558
- end
559
-
560
- # Convenience wrapper for _langlinks_ returning the title in language _lang_ (ISO code) for a given article of pageid, if it exists, via the interlanguage link
561
- #
562
- # Example:
563
- #
564
- # langlink = mw.langlink_for_lang('Tycho Brahe', 'de')
565
- def langlink_for_lang(article_or_pageid, lang)
566
- return langlinks(article_or_pageid)[lang]
567
- end
568
-
569
- # Requests image info from MediaWiki. Follows redirects.
570
- #
571
- # _file_name_or_page_id_ should be either:
572
- # * a file name (String) you want info about without File: prefix.
573
- # * or a Fixnum page id you of the file.
574
- #
575
- # _options_ is +Hash+ passed as query arguments. See
576
- # http://www.mediawiki.org/wiki/API:Query_-_Properties#imageinfo_.2F_ii
577
- # for more information.
578
- #
579
- # options['iiprop'] should be either a string of properties joined by
580
- # '|' or an +Array+ (or more precisely something that responds to #join).
581
- #
582
- # +Hash+ like object is returned where keys are image properties.
583
- #
584
- # Example:
585
- # mw.image_info(
586
- # "Trooper.jpg", 'iiprop' => ['timestamp', 'user']
587
- # ).each do |key, value|
588
- # puts "#{key.inspect} => #{value.inspect}"
589
- # end
590
- #
591
- # Output:
592
- # "timestamp" => "2009-10-31T12:59:11Z"
593
- # "user" => "Valdas"
594
- #
595
- def image_info(file_name_or_page_id, options={})
596
- options['iiprop'] = options['iiprop'].join('|') \
597
- if options['iiprop'].respond_to?(:join)
598
- form_data = options.merge(
599
- 'action' => 'query',
600
- 'prop' => 'imageinfo',
601
- 'redirects' => true
602
- )
603
-
604
- case file_name_or_page_id
605
- when Fixnum
606
- form_data['pageids'] = file_name_or_page_id
607
- else
608
- form_data['titles'] = "File:#{file_name_or_page_id}"
609
- end
610
-
611
- xml, _ = make_api_request(form_data)
612
- page = xml.elements["query/pages/page"]
613
- if valid_page? page
614
- if xml.elements["query/redirects/r"]
615
- # We're dealing with redirect here.
616
- image_info(page.attributes["pageid"].to_i, options)
617
- else
618
- page.elements["imageinfo/ii"].attributes
619
- end
620
- else
621
- nil
622
- end
623
- end
624
-
625
- # Download _file_name_ (without "File:" or "Image:" prefix). Returns file contents. All options are passed to
626
- # #image_info however options['iiprop'] is forced to url. You can still
627
- # set other options to control what file you want to download.
628
- def download(file_name, options={})
629
- options['iiprop'] = 'url'
630
-
631
- attributes = image_info(file_name, options)
632
- if attributes
633
- RestClient.get attributes['url']
634
- else
635
- nil
636
- end
637
- end
638
-
639
- # Imports a MediaWiki XML dump
640
- #
641
- # [xml] String or array of page names to fetch
642
- # [options] Hash of additional options
643
- #
644
- # Returns XML array <api><import><page/><page/>...
645
- # <page revisions="1"> (or more) means successfully imported
646
- # <page revisions="0"> means duplicate, not imported
647
- def import(xmlfile, options = {})
648
- make_api_request(options.merge(
649
- 'action' => 'import',
650
- 'xml' => File.new(xmlfile),
651
- 'token' => get_token('import', 'Main Page'), # NB: dummy page name
652
- 'format' => 'xml'
653
- ))
654
- end
655
-
656
- # Exports a page or set of pages
657
- #
658
- # [page_titles] String or array of page titles to fetch
659
- # [options] Hash of additional options
660
- #
661
- # Returns MediaWiki XML dump
662
- def export(page_titles, options = {})
663
- make_api_request(options.merge(
664
- 'action' => 'query',
665
- 'titles' => Array(page_titles).join('|'),
666
- 'export' => nil,
667
- 'exportnowrap' => nil
668
- )).first
669
- end
670
-
671
- # Get the wiki's siteinfo as a hash. See http://www.mediawiki.org/wiki/API:Siteinfo.
672
- #
673
- # [options] Hash of additional options
674
- def siteinfo(options = {})
675
- res = make_api_request(options.merge(
676
- 'action' => 'query',
677
- 'meta' => 'siteinfo'
678
- )).first
679
-
680
- REXML::XPath.first(res, '//query/general')
681
- .attributes.each_with_object({}) { |(k, v), h| h[k] = v }
682
- end
683
-
684
- # Get the wiki's MediaWiki version.
685
- #
686
- # [options] Hash of additional options passed to #siteinfo
687
- def version(options = {})
688
- siteinfo(options).fetch('generator', '').split.last
689
- end
690
-
691
- # Get a list of all known namespaces
692
- #
693
- # [options] Hash of additional options
694
- #
695
- # Returns array of namespaces (name => id)
696
- def namespaces_by_prefix(options = {})
697
- res = make_api_request(options.merge(
698
- 'action' => 'query',
699
- 'meta' => 'siteinfo',
700
- 'siprop' => 'namespaces'
701
- )).first
702
-
703
- REXML::XPath.match(res, "//ns").inject(Hash.new) do |namespaces, namespace|
704
- prefix = namespace.attributes["canonical"] || ""
705
- namespaces[prefix] = namespace.attributes["id"].to_i
706
- namespaces
707
- end
708
- end
709
-
710
- # Get a list of all installed (and registered) extensions
711
- #
712
- # [options] Hash of additional options
713
- #
714
- # Returns array of extensions (name => version)
715
- def extensions(options = {})
716
- res = make_api_request(options.merge(
717
- 'action' => 'query',
718
- 'meta' => 'siteinfo',
719
- 'siprop' => 'extensions'
720
- )).first
721
-
722
- REXML::XPath.match(res, "//ext").inject(Hash.new) do |extensions, extension|
723
- name = extension.attributes["name"] || ""
724
- extensions[name] = extension.attributes["version"]
725
- extensions
726
- end
727
- end
728
-
729
- # Sends e-mail to a user
730
- #
731
- # [user] Username to send mail to (name only: eg. 'Bob', not 'User:Bob')
732
- # [subject] Subject of message
733
- # [content] Content of message
734
- # [options] Hash of additional options
735
- #
736
- # Will raise a 'noemail' APIError if the target user does not have a confirmed email address, see http://www.mediawiki.org/wiki/API:E-mail for details.
737
- def email_user(user, subject, text, options = {})
738
- res = make_api_request(options.merge(
739
- 'action' => 'emailuser',
740
- 'target' => user,
741
- 'subject' => subject,
742
- 'text' => text,
743
- 'token' => get_token('email', "User:#{user}")
744
- )).first
745
-
746
- res.elements['emailuser'].attributes['result'] == 'Success'
747
- end
748
-
749
- # Execute Semantic Mediawiki query
750
- #
751
- # [query] Semantic Mediawiki query
752
- # [params] Array of additional parameters or options, eg. mainlabel=Foo or ?Place (optional)
753
- # [options] Hash of additional options
754
- #
755
- # Returns result as an HTML string
756
- def semantic_query(query, params = [], options = {})
757
- unless smw_version = extensions['Semantic MediaWiki']
758
- raise MediaWiki::Exception, 'Semantic MediaWiki extension not installed.'
759
- end
760
-
761
- if smw_version.to_f >= 1.7
762
- make_api_request(options.merge(
763
- 'action' => 'ask',
764
- 'query' => "#{query}|#{params.join('|')}"
765
- )).first
766
- else
767
- make_api_request(options.merge(
768
- 'action' => 'parse',
769
- 'prop' => 'text',
770
- 'text' => "{{#ask:#{query}|#{params.push('format=list').join('|')}}}"
771
- )).first.elements['parse/text'].text
772
- end
773
- end
774
-
775
- # Create a new account
776
- #
777
- # [options] is +Hash+ passed as query arguments. See https://www.mediawiki.org/wiki/API:Account_creation#Parameters for more information.
778
- def create_account(options)
779
- make_api_request(options.merge('action' => 'createaccount')).first
780
- end
781
-
782
- # Sets options for currenlty logged in user
783
- #
784
- # [changes] a +Hash+ that will be transformed into an equal sign and pipe-separated key value parameter
785
- # [optionname] a +String+ indicating which option to change (optional)
786
- # [optionvalue] the new value for optionname - allows pipe characters (optional)
787
- # [reset] a +Boolean+ indicating if all preferences should be reset to site defaults (optional)
788
- # [options] Hash of additional options
789
- def options(changes = {}, optionname = nil, optionvalue = nil, reset = false, options = {})
790
- form_data = options.merge(
791
- 'action' => 'options',
792
- 'token' => get_options_token
793
- )
794
-
795
- if changes.present?
796
- form_data['change'] = changes.map { |key, value| "#{key}=#{value}" }.join('|')
797
- end
798
-
799
- if optionname.present?
800
- form_data[optionname] = optionvalue
801
- end
802
-
803
- if reset
804
- form_data['reset'] = true
805
- end
806
-
807
- make_api_request(form_data).first
808
- end
809
-
810
- # Set groups for a user
58
+ # Make generic request to API
811
59
  #
812
- # [user] Username of user to modify
813
- # [groups_to_add] Groups to add user to, as an array or a string if a single group (optional)
814
- # [groups_to_remove] Groups to remove user from, as an array or a string if a single group (optional)
815
- # [options] Hash of additional options
816
- def set_groups(user, groups_to_add = [], groups_to_remove = [], comment = '', options = {})
817
- token = get_userrights_token(user)
818
- userrights(user, token, groups_to_add, groups_to_remove, comment, options)
819
- end
820
-
821
- # Review current revision of an article (requires FlaggedRevisions extension, see http://www.mediawiki.org/wiki/Extension:FlaggedRevs)
60
+ # [form_data] hash of attributes to post
61
+ # [continue_xpath] XPath selector for query continue parameter
822
62
  #
823
- # [title] Title of article to review
824
- # [flags] Hash of flags and values to set, eg. { "accuracy" => "1", "depth" => "2" }
825
- # [comment] Comment to add to review (optional)
826
- # [options] Hash of additional options
827
- def review(title, flags, comment = "Reviewed by MediaWiki::Gateway", options = {})
828
- raise APIError.new('missingtitle', "Article #{title} not found") unless revid = revision(title)
829
-
830
- form_data = options.merge(
831
- 'action' => 'review',
832
- 'revid' => revid,
833
- 'token' => get_token('edit', title),
834
- 'comment' => comment
835
- )
836
-
837
- flags.each { |k, v| form_data["flag_#{k}"] = v }
838
-
839
- make_api_request(form_data).first
63
+ # Returns XML document
64
+ def send_request(form_data, continue_xpath = nil)
65
+ make_api_request(form_data, continue_xpath).first
840
66
  end
841
67
 
842
68
  private
843
69
 
844
70
  # Fetch token (type 'delete', 'edit', 'email', 'import', 'move', 'protect')
845
71
  def get_token(type, page_titles)
846
- form_data = {'action' => 'query', 'prop' => 'info', 'intoken' => type, 'titles' => page_titles}
847
- res, _ = make_api_request(form_data)
848
- token = res.elements["query/pages/page"].attributes[type + "token"]
849
- raise Unauthorized.new "User is not permitted to perform this operation: #{type}" if token.nil?
850
- token
851
- end
72
+ res = send_request(
73
+ 'action' => 'query',
74
+ 'prop' => 'info',
75
+ 'intoken' => type,
76
+ 'titles' => page_titles
77
+ )
852
78
 
853
- def get_undelete_token(page_titles)
854
- form_data = {'action' => 'query', 'list' => 'deletedrevs', 'prop' => 'info', 'drprop' => 'token', 'titles' => page_titles}
855
- res, _ = make_api_request(form_data)
856
- if res.elements["query/deletedrevs/page"]
857
- token = res.elements["query/deletedrevs/page"].attributes["token"]
858
- raise Unauthorized.new "User is not permitted to perform this operation: #{type}" if token.nil?
859
- token
860
- else
861
- nil
862
- end
863
- end
864
-
865
- # User rights management (aka group assignment)
866
- def get_userrights_token(user)
867
- form_data = {'action' => 'query', 'list' => 'users', 'ustoken' => 'userrights', 'ususers' => user}
868
- res, _ = make_api_request(form_data)
869
- token = res.elements["query/users/user"].attributes["userrightstoken"]
870
-
871
- @log.debug("RESPONSE: #{res.to_s}")
872
- if token.nil?
873
- if res.elements["query/users/user"].attributes["missing"]
874
- raise APIError.new('invaliduser', "User '#{user}' was not found (get_userrights_token)")
875
- else
876
- raise Unauthorized.new "User '#{@username}' is not permitted to perform this operation: get_userrights_token"
877
- end
79
+ unless token = res.elements['query/pages/page'].attributes[type + 'token']
80
+ raise Unauthorized.new "User is not permitted to perform this operation: #{type}"
878
81
  end
879
82
 
880
83
  token
881
84
  end
882
85
 
883
- def get_options_token
884
- form_data = { 'action' => 'tokens', 'type' => 'options' }
885
- res, _ = make_api_request(form_data)
886
- res.elements['tokens'].attributes['optionstoken']
887
- end
888
-
889
- def userrights(user, token, groups_to_add, groups_to_remove, reason, options = {})
890
- # groups_to_add and groups_to_remove can be a string or an array. Turn them into MediaWiki's pipe-delimited list format.
891
- if groups_to_add.is_a? Array
892
- groups_to_add = groups_to_add.join('|')
893
- end
894
-
895
- if groups_to_remove.is_a? Array
896
- groups_to_remove = groups_to_remove.join('|')
897
- end
898
-
899
- make_api_request(options.merge(
900
- 'action' => 'userrights',
901
- 'user' => user,
902
- 'token' => token,
903
- 'add' => groups_to_add,
904
- 'remove' => groups_to_remove,
905
- 'reason' => reason
906
- )).first
907
- end
908
-
909
-
910
- # Make a custom query
911
- #
912
- # [options] query options
913
- #
914
- # Returns the REXML::Element object as result
915
- #
916
- # Example:
917
- # def creation_time(pagename)
918
- # res = bot.custom_query(:prop => :revisions,
919
- # :titles => pagename,
920
- # :rvprop => :timestamp,
921
- # :rvdir => :newer,
922
- # :rvlimit => 1)
923
- # timestr = res.get_elements('*/*/*/rev')[0].attribute('timestamp').to_s
924
- # time.parse(timestr)
925
- # end
926
- #
927
- def custom_query(options)
928
- form_data = {}
929
- options.each {|k,v| form_data[k.to_s] = v.to_s }
930
- form_data['action'] = 'query'
931
- make_api_request(form_data).first.elements['query']
932
- end
933
-
934
86
  # Iterate over query results
935
87
  #
936
88
  # [list] list name to query
@@ -967,37 +119,39 @@ module MediaWiki
967
119
 
968
120
  # Make generic request to API
969
121
  #
970
- # [form_data] hash or string of attributes to post
122
+ # [form_data] hash of attributes to post
971
123
  # [continue_xpath] XPath selector for query continue parameter
972
124
  # [retry_count] Counter for retries
973
125
  #
974
- # Returns XML document
975
- def make_api_request(form_data, continue_xpath=nil, retry_count=1)
976
- if form_data.kind_of? Hash
977
- form_data['format'] = 'xml'
978
- form_data['maxlag'] = @options[:maxlag]
979
- end
980
- http_send(@wiki_url, form_data, @headers.merge({:cookies => @cookies})) do |response, &block|
981
- if response.code == 503 and retry_count < @options[:retry_count]
126
+ # Returns array of XML document and query continue parameter.
127
+ def make_api_request(form_data, continue_xpath = nil, retry_count = 1)
128
+ form_data.update('format' => 'xml', 'maxlag' => @options[:maxlag])
129
+
130
+ http_send(@wiki_url, form_data, @headers.merge(cookies: @cookies)) { |response, &block|
131
+ if response.code == 503 && retry_count < @options[:retry_count]
982
132
  log.warn("503 Service Unavailable: #{response.body}. Retry in #{@options[:retry_delay]} seconds.")
983
- sleep @options[:retry_delay]
133
+ sleep(@options[:retry_delay])
984
134
  make_api_request(form_data, continue_xpath, retry_count + 1)
985
135
  end
136
+
986
137
  # Check response for errors and return XML
987
- raise MediaWiki::Exception.new "Bad response: #{response}" unless response.code >= 200 and response.code < 300
138
+ unless response.code >= 200 && response.code < 300
139
+ raise MediaWiki::Exception.new("Bad response: #{response}")
140
+ end
141
+
988
142
  doc = get_response(response.dup)
989
- action = form_data['action']
990
143
 
991
144
  # login and createaccount actions require a second request with a token received on the first request
992
- if %w(login createaccount).include?(action)
145
+ if %w[login createaccount].include?(action = form_data['action'])
993
146
  action_result = doc.elements[action].attributes['result']
994
- @cookies.merge!(response.cookies)
147
+ @cookies.update(response.cookies)
995
148
 
996
149
  case action_result.downcase
997
- when "success" then
150
+ when 'success'
998
151
  return [doc, false]
999
- when "needtoken"
1000
- token = doc.elements[action].attributes["token"]
152
+ when 'needtoken'
153
+ token = doc.elements[action].attributes['token']
154
+
1001
155
  if action == 'login'
1002
156
  return make_api_request(form_data.merge('lgtoken' => token))
1003
157
  elsif action == 'createaccount'
@@ -1011,23 +165,21 @@ module MediaWiki
1011
165
  end
1012
166
  end
1013
167
  end
1014
- continue = (continue_xpath and doc.elements['query-continue']) ? REXML::XPath.first(doc, continue_xpath) : nil
1015
- return [doc, continue]
1016
- end
168
+
169
+ return [doc, (continue_xpath && doc.elements['query-continue']) ?
170
+ REXML::XPath.first(doc, continue_xpath) : nil]
171
+ }
1017
172
  end
1018
173
 
1019
174
  # Execute the HTTP request using either GET or POST as appropriate
1020
175
  def http_send url, form_data, headers, &block
1021
- opts = @http_options.merge(:url => url, :headers => headers)
176
+ opts = @http_options.merge(url: url, headers: headers)
177
+ opts[:method] = form_data['action'] == 'query' ? :get : :post
178
+ opts[:method] == :get ? headers[:params] = form_data : opts[:payload] = form_data
1022
179
 
1023
- if form_data['action'] == 'query'
1024
- log.debug("GET: #{form_data.inspect}, #{@cookies.inspect}")
1025
- headers[:params] = form_data
1026
- RestClient::Request.execute(opts.update(:method => :get), &block)
1027
- else
1028
- log.debug("POST: #{form_data.inspect}, #{@cookies.inspect}")
1029
- RestClient::Request.execute(opts.update(:method => :post, :payload => form_data), &block)
1030
- end
180
+ log.debug("#{opts[:method].upcase}: #{form_data.inspect}, #{@cookies.inspect}")
181
+
182
+ RestClient::Request.execute(opts, &block)
1031
183
  end
1032
184
 
1033
185
  # Get API XML response
@@ -1035,41 +187,54 @@ module MediaWiki
1035
187
  # Otherwise return XML root
1036
188
  def get_response(res)
1037
189
  begin
1038
- res = res.force_encoding("UTF-8") if res.respond_to?(:force_encoding)
190
+ res = res.force_encoding('UTF-8') if res.respond_to?(:force_encoding)
1039
191
  doc = REXML::Document.new(res).root
1040
192
  rescue REXML::ParseException
1041
- raise MediaWiki::Exception.new "Response is not XML. Are you sure you are pointing to api.php?"
193
+ raise MediaWiki::Exception.new('Response is not XML. Are you sure you are pointing to api.php?')
1042
194
  end
195
+
1043
196
  log.debug("RES: #{doc}")
1044
- raise MediaWiki::Exception.new "Response does not contain Mediawiki API XML: #{res}" unless [ "api", "mediawiki" ].include? doc.name
1045
- if doc.elements["error"]
1046
- code = doc.elements["error"].attributes["code"]
1047
- info = doc.elements["error"].attributes["info"]
1048
- raise APIError.new(code, info)
197
+
198
+ unless %w[api mediawiki].include?(doc.name)
199
+ raise MediaWiki::Exception.new("Response does not contain Mediawiki API XML: #{res}")
1049
200
  end
1050
- if doc.elements["warnings"]
1051
- warning("API warning: #{doc.elements["warnings"].children.map {|e| e.text}.join(", ")}")
201
+
202
+ if error = doc.elements['error']
203
+ raise APIError.new(*error.attributes.values_at(*%w[code info]))
1052
204
  end
205
+
206
+ if warnings = doc.elements['warnings']
207
+ warning("API warning: #{warnings.children.map(&:text).join(', ')}")
208
+ end
209
+
1053
210
  doc
1054
211
  end
1055
212
 
213
+ def validate_options(options, valid_options)
214
+ options.each_key { |opt|
215
+ unless valid_options.include?(opt.to_s)
216
+ raise ArgumentError, "Unknown option '#{opt}'", caller(1)
217
+ end
218
+ }
219
+ end
220
+
1056
221
  def valid_page?(page)
1057
- return false unless page
1058
- return false if page.attributes["missing"]
1059
- if page.attributes["invalid"]
1060
- warning("Invalid title '#{page.attributes["title"]}'")
1061
- else
1062
- true
1063
- end
222
+ page && !page.attributes['missing'] && (!page.attributes['invalid'] ||
223
+ warning("Invalid title '#{page.attributes['title']}'"))
1064
224
  end
1065
225
 
1066
226
  def warning(msg)
1067
- if @options[:ignorewarnings]
1068
- log.warn(msg)
1069
- return false
1070
- else
1071
- raise APIError.new('warning', msg)
1072
- end
227
+ raise APIError.new('warning', msg) unless @options[:ignorewarnings]
228
+ log.warn(msg)
229
+ false
1073
230
  end
231
+
1074
232
  end
233
+
1075
234
  end
235
+
236
+ require_relative 'gateway/files'
237
+ require_relative 'gateway/pages'
238
+ require_relative 'gateway/query'
239
+ require_relative 'gateway/site'
240
+ require_relative 'gateway/users'