mediawiki-gateway 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +1 -0
- data/config/hosts.yml +8 -8
- data/lib/media_wiki/gateway.rb +33 -22
- data/lib/media_wiki.rb +1 -1
- data/mediawiki-gateway.gemspec +3 -2
- data/script/create_page.rb +1 -1
- data/script/search_content.rb +12 -0
- data/script/upload_file.rb +1 -1
- metadata +5 -4
data/README
CHANGED
@@ -4,6 +4,7 @@ A Ruby framework for MediaWiki API manipulation. Features out of the box:
|
|
4
4
|
|
5
5
|
* Simple, elegant syntax for common operations
|
6
6
|
* Handles login, edit, move etc tokens for you
|
7
|
+
* List, search operations work around API limits to fetch all results
|
7
8
|
* Support for maxlag detection and automated retries on 503
|
8
9
|
* Integrated logging
|
9
10
|
* Tested up to MediaWiki 1.16
|
data/config/hosts.yml
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
---
|
2
2
|
en-wp:
|
3
3
|
url: http://en.wikipedia.org/w/api.php
|
4
|
-
pw:
|
5
|
-
user:
|
4
|
+
pw: password
|
5
|
+
user: username
|
6
6
|
commons:
|
7
7
|
url: http://commons.wikimedia.org/w/api.php
|
8
|
-
pw:
|
9
|
-
user:
|
8
|
+
pw: password
|
9
|
+
user: username
|
10
10
|
en-wt:
|
11
11
|
url: http://wikitravel.org/wiki/en/api.php
|
12
|
-
pw:
|
13
|
-
user:
|
12
|
+
pw: password
|
13
|
+
user: username
|
14
14
|
local:
|
15
15
|
url: http://localhost/w/api.php
|
16
|
-
pw:
|
17
|
-
user:
|
16
|
+
pw: password
|
17
|
+
user: username
|
data/lib/media_wiki/gateway.rb
CHANGED
@@ -59,7 +59,7 @@ module MediaWiki
|
|
59
59
|
# Returns content of page as string, nil if the page does not exist
|
60
60
|
def get(page_title)
|
61
61
|
form_data = {'action' => 'query', 'prop' => 'revisions', 'rvprop' => 'content', 'titles' => page_title}
|
62
|
-
page = make_api_request(form_data).elements["query/pages/page"]
|
62
|
+
page = make_api_request(form_data).first.elements["query/pages/page"]
|
63
63
|
if ! page or page.attributes["missing"]
|
64
64
|
nil
|
65
65
|
else
|
@@ -86,7 +86,7 @@ module MediaWiki
|
|
86
86
|
options.keys.each{|opt| raise ArgumentError.new("Unknown option '#{opt}'") unless valid_options.include?(opt.to_s)}
|
87
87
|
|
88
88
|
rendered = nil
|
89
|
-
parsed = make_api_request(form_data).elements["parse"]
|
89
|
+
parsed = make_api_request(form_data).first.elements["parse"]
|
90
90
|
if parsed.attributes["revid"] != '0'
|
91
91
|
rendered = parsed.elements["text"].text.gsub(/<!--(.|\s)*?-->/, '')
|
92
92
|
# OPTIMIZE: unifiy the keys in +options+ like symbolize_keys! but w/o
|
@@ -158,8 +158,7 @@ module MediaWiki
|
|
158
158
|
token = get_undelete_token(title)
|
159
159
|
if token
|
160
160
|
form_data = {'action' => 'undelete', 'title' => title, 'token' => token }
|
161
|
-
|
162
|
-
xml.elements["undelete"].attributes["revisions"].to_i
|
161
|
+
make_api_request(form_data).first.elements["undelete"].attributes["revisions"].to_i
|
163
162
|
else
|
164
163
|
0 # No revisions to undelete
|
165
164
|
end
|
@@ -183,8 +182,7 @@ module MediaWiki
|
|
183
182
|
'apprefix' => key,
|
184
183
|
'aplimit' => @options[:limit],
|
185
184
|
'apnamespace' => namespace}
|
186
|
-
res = make_api_request(form_data)
|
187
|
-
apfrom = res.elements['query-continue'] ? res.elements['query-continue/allpages'].attributes['apfrom'] : nil
|
185
|
+
res, apfrom = make_api_request(form_data, '//query-continue/allpages/@apfrom')
|
188
186
|
titles += REXML::XPath.match(res, "//p").map { |x| x.attributes["title"] }
|
189
187
|
end while apfrom
|
190
188
|
titles
|
@@ -207,8 +205,7 @@ module MediaWiki
|
|
207
205
|
'blfilterredir' => filter,
|
208
206
|
'bllimit' => @options[:limit] }
|
209
207
|
form_data['blcontinue'] = blcontinue if blcontinue
|
210
|
-
res = make_api_request(form_data)
|
211
|
-
blcontinue = res.elements['query-continue'] ? res.elements['query-continue/backlinks'].attributes['blcontinue'] : nil
|
208
|
+
res, blcontinue = make_api_request(form_data, '//query-continue/backlinks/@blcontinue')
|
212
209
|
titles += REXML::XPath.match(res, "//bl").map { |x| x.attributes["title"] }
|
213
210
|
end while blcontinue
|
214
211
|
titles
|
@@ -217,22 +214,31 @@ module MediaWiki
|
|
217
214
|
# Get a list of pages with matching content in given namespaces
|
218
215
|
#
|
219
216
|
# [key] Search key
|
220
|
-
# [namespaces] Array of namespace names to search (defaults to
|
221
|
-
# [limit]
|
217
|
+
# [namespaces] Array of namespace names to search (defaults to main only)
|
218
|
+
# [limit] Maximum number of hits to ask for (defaults to 500; note that Wikimedia Foundation wikis allow only 50 for normal users)
|
222
219
|
#
|
223
220
|
# Returns array of page titles (empty if no matches)
|
224
|
-
def search(key, namespaces=nil, limit
|
221
|
+
def search(key, namespaces=nil, limit=@options[:limit])
|
225
222
|
titles = []
|
223
|
+
offset = nil
|
224
|
+
in_progress = true
|
225
|
+
|
226
226
|
form_data = { 'action' => 'query',
|
227
227
|
'list' => 'search',
|
228
228
|
'srwhat' => 'text',
|
229
229
|
'srsearch' => key,
|
230
|
-
'srlimit' => limit
|
230
|
+
'srlimit' => limit
|
231
|
+
}
|
231
232
|
if namespaces
|
232
233
|
namespaces = [ namespaces ] unless namespaces.kind_of? Array
|
233
234
|
form_data['srnamespace'] = namespaces.map! do |ns| namespaces_by_prefix[ns] end.join('|')
|
234
235
|
end
|
235
|
-
|
236
|
+
begin
|
237
|
+
form_data['sroffset'] = offset if offset
|
238
|
+
res, offset = make_api_request(form_data, '//query-continue/search/@sroffset')
|
239
|
+
titles += REXML::XPath.match(res, "//p").map { |x| x.attributes["title"] }
|
240
|
+
end while offset
|
241
|
+
titles
|
236
242
|
end
|
237
243
|
|
238
244
|
# Upload a file, or get the status of pending uploads. Several
|
@@ -345,7 +351,7 @@ module MediaWiki
|
|
345
351
|
form_data['titles'] = "File:#{file_name_or_page_id}"
|
346
352
|
end
|
347
353
|
|
348
|
-
xml = make_api_request(form_data)
|
354
|
+
xml, dummy = make_api_request(form_data)
|
349
355
|
page = xml.elements["query/pages/page"]
|
350
356
|
if ! page or page.attributes["missing"]
|
351
357
|
nil
|
@@ -431,7 +437,7 @@ module MediaWiki
|
|
431
437
|
def semantic_query(query, params = [])
|
432
438
|
params << "format=list"
|
433
439
|
form_data = { 'action' => 'parse', 'prop' => 'text', 'text' => "{{#ask:#{query}|#{params.join('|')}}}" }
|
434
|
-
xml = make_api_request(form_data)
|
440
|
+
xml, dummy = make_api_request(form_data)
|
435
441
|
return xml.elements["parse/text"].text
|
436
442
|
end
|
437
443
|
|
@@ -440,7 +446,7 @@ module MediaWiki
|
|
440
446
|
# Fetch token (type 'delete', 'edit', 'import', 'move')
|
441
447
|
def get_token(type, page_titles)
|
442
448
|
form_data = {'action' => 'query', 'prop' => 'info', 'intoken' => type, 'titles' => page_titles}
|
443
|
-
res = make_api_request(form_data)
|
449
|
+
res, dummy = make_api_request(form_data)
|
444
450
|
token = res.elements["query/pages/page"].attributes[type + "token"]
|
445
451
|
raise "User is not permitted to perform this operation: #{type}" if token.nil?
|
446
452
|
token
|
@@ -448,7 +454,7 @@ module MediaWiki
|
|
448
454
|
|
449
455
|
def get_undelete_token(page_titles)
|
450
456
|
form_data = {'action' => 'query', 'list' => 'deletedrevs', 'prop' => 'info', 'drprop' => 'token', 'titles' => page_titles}
|
451
|
-
res = make_api_request(form_data)
|
457
|
+
res, dummy = make_api_request(form_data)
|
452
458
|
if res.elements["query/deletedrevs/page"]
|
453
459
|
token = res.elements["query/deletedrevs/page"].attributes["token"]
|
454
460
|
raise "User is not permitted to perform this operation: #{type}" if token.nil?
|
@@ -461,9 +467,11 @@ module MediaWiki
|
|
461
467
|
# Make generic request to API
|
462
468
|
#
|
463
469
|
# [form_data] hash or string of attributes to post
|
470
|
+
# [continue_xpath] XPath selector for query continue parameter
|
471
|
+
# [retry_count] Counter for retries
|
464
472
|
#
|
465
473
|
# Returns XML document
|
466
|
-
def make_api_request(form_data, retry_count=1)
|
474
|
+
def make_api_request(form_data, continue_xpath=nil, retry_count=1)
|
467
475
|
if form_data.kind_of? Hash
|
468
476
|
form_data['format'] = 'xml'
|
469
477
|
form_data['maxlag'] = @options[:maxlag]
|
@@ -473,7 +481,7 @@ module MediaWiki
|
|
473
481
|
if response.code == 503 and retry_count < @options[:retry_count]
|
474
482
|
log.warn("503 Service Unavailable: #{response.body}. Retry in #{@options[:retry_delay]} seconds.")
|
475
483
|
sleep @options[:retry_delay]
|
476
|
-
make_api_request(form_data, retry_count + 1)
|
484
|
+
make_api_request(form_data, continue_xpath, retry_count + 1)
|
477
485
|
end
|
478
486
|
# Check response for errors and return XML
|
479
487
|
raise "API error, bad response: #{response}" unless response.code >= 200 and response.code < 300
|
@@ -487,13 +495,13 @@ module MediaWiki
|
|
487
495
|
else raise "Login failed: " + login_result
|
488
496
|
end
|
489
497
|
end
|
490
|
-
|
498
|
+
continue = (continue_xpath and doc.elements['query-continue']) ? REXML::XPath.first(doc, continue_xpath).value : nil
|
499
|
+
return [doc, continue]
|
491
500
|
end
|
492
|
-
|
493
501
|
end
|
494
502
|
|
495
503
|
# Get API XML response
|
496
|
-
# If there are errors, raise exception
|
504
|
+
# If there are errors or warnings, raise exception
|
497
505
|
# Otherwise return XML root
|
498
506
|
def get_response(res)
|
499
507
|
begin
|
@@ -508,6 +516,9 @@ module MediaWiki
|
|
508
516
|
info = doc.elements["error"].attributes["info"]
|
509
517
|
raise "API error: code '#{code}', info '#{info}'"
|
510
518
|
end
|
519
|
+
if doc.elements["warnings"] and !@options[:ignorewarnings]
|
520
|
+
raise "API warning: #{doc.elements["warnings"].children.map {|e| e.text}.join(", ")}"
|
521
|
+
end
|
511
522
|
doc
|
512
523
|
end
|
513
524
|
end
|
data/lib/media_wiki.rb
CHANGED
data/mediawiki-gateway.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{mediawiki-gateway}
|
8
|
-
s.version = "0.3.
|
8
|
+
s.version = "0.3.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Jani Patokallio"]
|
12
|
-
s.date = %q{2011-01-
|
12
|
+
s.date = %q{2011-01-25}
|
13
13
|
s.description = %q{}
|
14
14
|
s.email = %q{jpatokal@iki.fi}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -51,6 +51,7 @@ Gem::Specification.new do |s|
|
|
51
51
|
"script/get_page.rb",
|
52
52
|
"script/import_xml.rb",
|
53
53
|
"script/run_fake_media_wiki.rb",
|
54
|
+
"script/search_content.rb",
|
54
55
|
"script/upload_commons.rb",
|
55
56
|
"script/upload_file.rb",
|
56
57
|
"spec/fake_media_wiki/api_pages.rb",
|
data/script/create_page.rb
CHANGED
@@ -7,7 +7,7 @@ require 'lib/media_wiki'
|
|
7
7
|
config = MediaWiki::Config.new ARGV
|
8
8
|
config.abort("Name of article is mandatory.") unless config.article
|
9
9
|
|
10
|
-
mw = MediaWiki::Gateway.new(config.url, Logger::DEBUG)
|
10
|
+
mw = MediaWiki::Gateway.new(config.url, { :loglevel => Logger::DEBUG } )
|
11
11
|
mw.login(config.user, config.pw)
|
12
12
|
content = ARGF.read.to_s
|
13
13
|
puts mw.create(config.article, content, {:overwrite => true, :summary => config.summary})
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Sample script for searching page contents in a Wiki
|
4
|
+
#
|
5
|
+
require 'lib/media_wiki'
|
6
|
+
|
7
|
+
config = MediaWiki::Config.new ARGV
|
8
|
+
config.abort("Please specify search key as article name (-a)") unless config.article
|
9
|
+
|
10
|
+
mw = MediaWiki::Gateway.new(config.url, { :loglevel => Logger::DEBUG } )
|
11
|
+
mw.login(config.user, config.pw)
|
12
|
+
puts mw.search(config.article, nil, 50)
|
data/script/upload_file.rb
CHANGED
@@ -7,7 +7,7 @@ require 'lib/media_wiki'
|
|
7
7
|
config = MediaWiki::Config.new(ARGV, "upload")
|
8
8
|
config.abort("Name of file to upload is mandatory.") unless ARGV[0]
|
9
9
|
|
10
|
-
mw = MediaWiki::Gateway.new(config.url, Logger::DEBUG)
|
10
|
+
mw = MediaWiki::Gateway.new(config.url, { :loglevel => Logger::DEBUG } )
|
11
11
|
mw.login(config.user, config.pw)
|
12
12
|
mw.upload(ARGV[0], {:target => config.target, :description => config.desc, :summary => config.summary})
|
13
13
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mediawiki-gateway
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 1
|
10
|
+
version: 0.3.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jani Patokallio
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-01-
|
18
|
+
date: 2011-01-25 00:00:00 +11:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -132,6 +132,7 @@ files:
|
|
132
132
|
- script/get_page.rb
|
133
133
|
- script/import_xml.rb
|
134
134
|
- script/run_fake_media_wiki.rb
|
135
|
+
- script/search_content.rb
|
135
136
|
- script/upload_commons.rb
|
136
137
|
- script/upload_file.rb
|
137
138
|
- spec/fake_media_wiki/api_pages.rb
|