mediawiki-gateway 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +1 -0
- data/config/hosts.yml +8 -8
- data/lib/media_wiki/gateway.rb +33 -22
- data/lib/media_wiki.rb +1 -1
- data/mediawiki-gateway.gemspec +3 -2
- data/script/create_page.rb +1 -1
- data/script/search_content.rb +12 -0
- data/script/upload_file.rb +1 -1
- metadata +5 -4
data/README
CHANGED
@@ -4,6 +4,7 @@ A Ruby framework for MediaWiki API manipulation. Features out of the box:
|
|
4
4
|
|
5
5
|
* Simple, elegant syntax for common operations
|
6
6
|
* Handles login, edit, move etc tokens for you
|
7
|
+
* List, search operations work around API limits to fetch all results
|
7
8
|
* Support for maxlag detection and automated retries on 503
|
8
9
|
* Integrated logging
|
9
10
|
* Tested up to MediaWiki 1.16
|
data/config/hosts.yml
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
---
|
2
2
|
en-wp:
|
3
3
|
url: http://en.wikipedia.org/w/api.php
|
4
|
-
pw:
|
5
|
-
user:
|
4
|
+
pw: password
|
5
|
+
user: username
|
6
6
|
commons:
|
7
7
|
url: http://commons.wikimedia.org/w/api.php
|
8
|
-
pw:
|
9
|
-
user:
|
8
|
+
pw: password
|
9
|
+
user: username
|
10
10
|
en-wt:
|
11
11
|
url: http://wikitravel.org/wiki/en/api.php
|
12
|
-
pw:
|
13
|
-
user:
|
12
|
+
pw: password
|
13
|
+
user: username
|
14
14
|
local:
|
15
15
|
url: http://localhost/w/api.php
|
16
|
-
pw:
|
17
|
-
user:
|
16
|
+
pw: password
|
17
|
+
user: username
|
data/lib/media_wiki/gateway.rb
CHANGED
@@ -59,7 +59,7 @@ module MediaWiki
|
|
59
59
|
# Returns content of page as string, nil if the page does not exist
|
60
60
|
def get(page_title)
|
61
61
|
form_data = {'action' => 'query', 'prop' => 'revisions', 'rvprop' => 'content', 'titles' => page_title}
|
62
|
-
page = make_api_request(form_data).elements["query/pages/page"]
|
62
|
+
page = make_api_request(form_data).first.elements["query/pages/page"]
|
63
63
|
if ! page or page.attributes["missing"]
|
64
64
|
nil
|
65
65
|
else
|
@@ -86,7 +86,7 @@ module MediaWiki
|
|
86
86
|
options.keys.each{|opt| raise ArgumentError.new("Unknown option '#{opt}'") unless valid_options.include?(opt.to_s)}
|
87
87
|
|
88
88
|
rendered = nil
|
89
|
-
parsed = make_api_request(form_data).elements["parse"]
|
89
|
+
parsed = make_api_request(form_data).first.elements["parse"]
|
90
90
|
if parsed.attributes["revid"] != '0'
|
91
91
|
rendered = parsed.elements["text"].text.gsub(/<!--(.|\s)*?-->/, '')
|
92
92
|
# OPTIMIZE: unifiy the keys in +options+ like symbolize_keys! but w/o
|
@@ -158,8 +158,7 @@ module MediaWiki
|
|
158
158
|
token = get_undelete_token(title)
|
159
159
|
if token
|
160
160
|
form_data = {'action' => 'undelete', 'title' => title, 'token' => token }
|
161
|
-
|
162
|
-
xml.elements["undelete"].attributes["revisions"].to_i
|
161
|
+
make_api_request(form_data).first.elements["undelete"].attributes["revisions"].to_i
|
163
162
|
else
|
164
163
|
0 # No revisions to undelete
|
165
164
|
end
|
@@ -183,8 +182,7 @@ module MediaWiki
|
|
183
182
|
'apprefix' => key,
|
184
183
|
'aplimit' => @options[:limit],
|
185
184
|
'apnamespace' => namespace}
|
186
|
-
res = make_api_request(form_data)
|
187
|
-
apfrom = res.elements['query-continue'] ? res.elements['query-continue/allpages'].attributes['apfrom'] : nil
|
185
|
+
res, apfrom = make_api_request(form_data, '//query-continue/allpages/@apfrom')
|
188
186
|
titles += REXML::XPath.match(res, "//p").map { |x| x.attributes["title"] }
|
189
187
|
end while apfrom
|
190
188
|
titles
|
@@ -207,8 +205,7 @@ module MediaWiki
|
|
207
205
|
'blfilterredir' => filter,
|
208
206
|
'bllimit' => @options[:limit] }
|
209
207
|
form_data['blcontinue'] = blcontinue if blcontinue
|
210
|
-
res = make_api_request(form_data)
|
211
|
-
blcontinue = res.elements['query-continue'] ? res.elements['query-continue/backlinks'].attributes['blcontinue'] : nil
|
208
|
+
res, blcontinue = make_api_request(form_data, '//query-continue/backlinks/@blcontinue')
|
212
209
|
titles += REXML::XPath.match(res, "//bl").map { |x| x.attributes["title"] }
|
213
210
|
end while blcontinue
|
214
211
|
titles
|
@@ -217,22 +214,31 @@ module MediaWiki
|
|
217
214
|
# Get a list of pages with matching content in given namespaces
|
218
215
|
#
|
219
216
|
# [key] Search key
|
220
|
-
# [namespaces] Array of namespace names to search (defaults to
|
221
|
-
# [limit]
|
217
|
+
# [namespaces] Array of namespace names to search (defaults to main only)
|
218
|
+
# [limit] Maximum number of hits to ask for (defaults to 500; note that Wikimedia Foundation wikis allow only 50 for normal users)
|
222
219
|
#
|
223
220
|
# Returns array of page titles (empty if no matches)
|
224
|
-
def search(key, namespaces=nil, limit
|
221
|
+
def search(key, namespaces=nil, limit=@options[:limit])
|
225
222
|
titles = []
|
223
|
+
offset = nil
|
224
|
+
in_progress = true
|
225
|
+
|
226
226
|
form_data = { 'action' => 'query',
|
227
227
|
'list' => 'search',
|
228
228
|
'srwhat' => 'text',
|
229
229
|
'srsearch' => key,
|
230
|
-
'srlimit' => limit
|
230
|
+
'srlimit' => limit
|
231
|
+
}
|
231
232
|
if namespaces
|
232
233
|
namespaces = [ namespaces ] unless namespaces.kind_of? Array
|
233
234
|
form_data['srnamespace'] = namespaces.map! do |ns| namespaces_by_prefix[ns] end.join('|')
|
234
235
|
end
|
235
|
-
|
236
|
+
begin
|
237
|
+
form_data['sroffset'] = offset if offset
|
238
|
+
res, offset = make_api_request(form_data, '//query-continue/search/@sroffset')
|
239
|
+
titles += REXML::XPath.match(res, "//p").map { |x| x.attributes["title"] }
|
240
|
+
end while offset
|
241
|
+
titles
|
236
242
|
end
|
237
243
|
|
238
244
|
# Upload a file, or get the status of pending uploads. Several
|
@@ -345,7 +351,7 @@ module MediaWiki
|
|
345
351
|
form_data['titles'] = "File:#{file_name_or_page_id}"
|
346
352
|
end
|
347
353
|
|
348
|
-
xml = make_api_request(form_data)
|
354
|
+
xml, dummy = make_api_request(form_data)
|
349
355
|
page = xml.elements["query/pages/page"]
|
350
356
|
if ! page or page.attributes["missing"]
|
351
357
|
nil
|
@@ -431,7 +437,7 @@ module MediaWiki
|
|
431
437
|
def semantic_query(query, params = [])
|
432
438
|
params << "format=list"
|
433
439
|
form_data = { 'action' => 'parse', 'prop' => 'text', 'text' => "{{#ask:#{query}|#{params.join('|')}}}" }
|
434
|
-
xml = make_api_request(form_data)
|
440
|
+
xml, dummy = make_api_request(form_data)
|
435
441
|
return xml.elements["parse/text"].text
|
436
442
|
end
|
437
443
|
|
@@ -440,7 +446,7 @@ module MediaWiki
|
|
440
446
|
# Fetch token (type 'delete', 'edit', 'import', 'move')
|
441
447
|
def get_token(type, page_titles)
|
442
448
|
form_data = {'action' => 'query', 'prop' => 'info', 'intoken' => type, 'titles' => page_titles}
|
443
|
-
res = make_api_request(form_data)
|
449
|
+
res, dummy = make_api_request(form_data)
|
444
450
|
token = res.elements["query/pages/page"].attributes[type + "token"]
|
445
451
|
raise "User is not permitted to perform this operation: #{type}" if token.nil?
|
446
452
|
token
|
@@ -448,7 +454,7 @@ module MediaWiki
|
|
448
454
|
|
449
455
|
def get_undelete_token(page_titles)
|
450
456
|
form_data = {'action' => 'query', 'list' => 'deletedrevs', 'prop' => 'info', 'drprop' => 'token', 'titles' => page_titles}
|
451
|
-
res = make_api_request(form_data)
|
457
|
+
res, dummy = make_api_request(form_data)
|
452
458
|
if res.elements["query/deletedrevs/page"]
|
453
459
|
token = res.elements["query/deletedrevs/page"].attributes["token"]
|
454
460
|
raise "User is not permitted to perform this operation: #{type}" if token.nil?
|
@@ -461,9 +467,11 @@ module MediaWiki
|
|
461
467
|
# Make generic request to API
|
462
468
|
#
|
463
469
|
# [form_data] hash or string of attributes to post
|
470
|
+
# [continue_xpath] XPath selector for query continue parameter
|
471
|
+
# [retry_count] Counter for retries
|
464
472
|
#
|
465
473
|
# Returns XML document
|
466
|
-
def make_api_request(form_data, retry_count=1)
|
474
|
+
def make_api_request(form_data, continue_xpath=nil, retry_count=1)
|
467
475
|
if form_data.kind_of? Hash
|
468
476
|
form_data['format'] = 'xml'
|
469
477
|
form_data['maxlag'] = @options[:maxlag]
|
@@ -473,7 +481,7 @@ module MediaWiki
|
|
473
481
|
if response.code == 503 and retry_count < @options[:retry_count]
|
474
482
|
log.warn("503 Service Unavailable: #{response.body}. Retry in #{@options[:retry_delay]} seconds.")
|
475
483
|
sleep @options[:retry_delay]
|
476
|
-
make_api_request(form_data, retry_count + 1)
|
484
|
+
make_api_request(form_data, continue_xpath, retry_count + 1)
|
477
485
|
end
|
478
486
|
# Check response for errors and return XML
|
479
487
|
raise "API error, bad response: #{response}" unless response.code >= 200 and response.code < 300
|
@@ -487,13 +495,13 @@ module MediaWiki
|
|
487
495
|
else raise "Login failed: " + login_result
|
488
496
|
end
|
489
497
|
end
|
490
|
-
|
498
|
+
continue = (continue_xpath and doc.elements['query-continue']) ? REXML::XPath.first(doc, continue_xpath).value : nil
|
499
|
+
return [doc, continue]
|
491
500
|
end
|
492
|
-
|
493
501
|
end
|
494
502
|
|
495
503
|
# Get API XML response
|
496
|
-
# If there are errors, raise exception
|
504
|
+
# If there are errors or warnings, raise exception
|
497
505
|
# Otherwise return XML root
|
498
506
|
def get_response(res)
|
499
507
|
begin
|
@@ -508,6 +516,9 @@ module MediaWiki
|
|
508
516
|
info = doc.elements["error"].attributes["info"]
|
509
517
|
raise "API error: code '#{code}', info '#{info}'"
|
510
518
|
end
|
519
|
+
if doc.elements["warnings"] and !@options[:ignorewarnings]
|
520
|
+
raise "API warning: #{doc.elements["warnings"].children.map {|e| e.text}.join(", ")}"
|
521
|
+
end
|
511
522
|
doc
|
512
523
|
end
|
513
524
|
end
|
data/lib/media_wiki.rb
CHANGED
data/mediawiki-gateway.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{mediawiki-gateway}
|
8
|
-
s.version = "0.3.
|
8
|
+
s.version = "0.3.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Jani Patokallio"]
|
12
|
-
s.date = %q{2011-01-
|
12
|
+
s.date = %q{2011-01-25}
|
13
13
|
s.description = %q{}
|
14
14
|
s.email = %q{jpatokal@iki.fi}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -51,6 +51,7 @@ Gem::Specification.new do |s|
|
|
51
51
|
"script/get_page.rb",
|
52
52
|
"script/import_xml.rb",
|
53
53
|
"script/run_fake_media_wiki.rb",
|
54
|
+
"script/search_content.rb",
|
54
55
|
"script/upload_commons.rb",
|
55
56
|
"script/upload_file.rb",
|
56
57
|
"spec/fake_media_wiki/api_pages.rb",
|
data/script/create_page.rb
CHANGED
@@ -7,7 +7,7 @@ require 'lib/media_wiki'
|
|
7
7
|
config = MediaWiki::Config.new ARGV
|
8
8
|
config.abort("Name of article is mandatory.") unless config.article
|
9
9
|
|
10
|
-
mw = MediaWiki::Gateway.new(config.url, Logger::DEBUG)
|
10
|
+
mw = MediaWiki::Gateway.new(config.url, { :loglevel => Logger::DEBUG } )
|
11
11
|
mw.login(config.user, config.pw)
|
12
12
|
content = ARGF.read.to_s
|
13
13
|
puts mw.create(config.article, content, {:overwrite => true, :summary => config.summary})
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Sample script for searching page contents in a Wiki
|
4
|
+
#
|
5
|
+
require 'lib/media_wiki'
|
6
|
+
|
7
|
+
config = MediaWiki::Config.new ARGV
|
8
|
+
config.abort("Please specify search key as article name (-a)") unless config.article
|
9
|
+
|
10
|
+
mw = MediaWiki::Gateway.new(config.url, { :loglevel => Logger::DEBUG } )
|
11
|
+
mw.login(config.user, config.pw)
|
12
|
+
puts mw.search(config.article, nil, 50)
|
data/script/upload_file.rb
CHANGED
@@ -7,7 +7,7 @@ require 'lib/media_wiki'
|
|
7
7
|
config = MediaWiki::Config.new(ARGV, "upload")
|
8
8
|
config.abort("Name of file to upload is mandatory.") unless ARGV[0]
|
9
9
|
|
10
|
-
mw = MediaWiki::Gateway.new(config.url, Logger::DEBUG)
|
10
|
+
mw = MediaWiki::Gateway.new(config.url, { :loglevel => Logger::DEBUG } )
|
11
11
|
mw.login(config.user, config.pw)
|
12
12
|
mw.upload(ARGV[0], {:target => config.target, :description => config.desc, :summary => config.summary})
|
13
13
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mediawiki-gateway
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 1
|
10
|
+
version: 0.3.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jani Patokallio
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-01-
|
18
|
+
date: 2011-01-25 00:00:00 +11:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -132,6 +132,7 @@ files:
|
|
132
132
|
- script/get_page.rb
|
133
133
|
- script/import_xml.rb
|
134
134
|
- script/run_fake_media_wiki.rb
|
135
|
+
- script/search_content.rb
|
135
136
|
- script/upload_commons.rb
|
136
137
|
- script/upload_file.rb
|
137
138
|
- spec/fake_media_wiki/api_pages.rb
|