wikipedia-api-fork 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +22 -0
- data/README +1 -0
- data/lib/mediawiki.rb +181 -0
- data/lib/wikipedia.rb +28 -0
- data/test/sample.xml +85 -0
- data/test/test.rb +34 -0
- metadata +72 -0
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2008 Ben Hughes
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Utility for interacting with the mediawiki api
|
data/lib/mediawiki.rb
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
# This file contains magical incantations to interface with the MediaWiki
|
2
|
+
# API. This is very much a work in progress so don't count on it not changing
|
3
|
+
# (for the better).
|
4
|
+
#
|
5
|
+
# The MediaWiki class wraps all the functionality for general MediaWiki usage.
|
6
|
+
# You can also require wikipedia.rb to get the
|
7
|
+
# Wikipedia[link:/files/lib/wikipedia_rb.html] constant that wraps
|
8
|
+
# up the basic functionality.
|
9
|
+
#
|
10
|
+
# == Installation
|
11
|
+
# sudo gem install schleyfox-wikipedia-api --source=http://gems.github.com
|
12
|
+
#
|
13
|
+
# == Basic Usage
|
14
|
+
#
|
15
|
+
# For example, to get a page from Wikiquote
|
16
|
+
#
|
17
|
+
# require 'mediawiki'
|
18
|
+
# w = MediaWiki.new('http://en.wikiquote.org/w/api.php')
|
19
|
+
# w.find_by_title('Oscar Wilde')
|
20
|
+
#
|
21
|
+
# If you want Wikipedia, you can use the built in constant
|
22
|
+
#
|
23
|
+
# require 'wikipedia'
|
24
|
+
# Wikipedia.find_by_title('Oscar Wilde')
|
25
|
+
#
|
26
|
+
# == Source
|
27
|
+
#
|
28
|
+
# Check out the source on github http://github.com/schleyfox/wikipedia-api
|
29
|
+
|
30
|
+
['hpricot', 'cgi', 'open-uri'].each {|f| require f}
|
31
|
+
|
32
|
+
|
33
|
+
# The MediaWiki class allows one to interface with the MediaWiki API.
|
34
|
+
# Everything about it is incomplete and I promise that it will eat your kids
|
35
|
+
# and/or small furry woodland creatures. These things happen.
|
36
|
+
#
|
37
|
+
# == Usage
|
38
|
+
#
|
39
|
+
# To use, you construct a MediaWiki object for the site
|
40
|
+
#
|
41
|
+
# require 'mediawiki'
|
42
|
+
# example_wiki = MediaWiki.new("http://example.com/w/api.php")
|
43
|
+
#
|
44
|
+
# From here you can query based on title or pageid for individual pages or
|
45
|
+
# collections
|
46
|
+
#
|
47
|
+
# # By pageid
|
48
|
+
# page = example_wiki.find(10)
|
49
|
+
# page.title #=> "foo"
|
50
|
+
#
|
51
|
+
# # By title
|
52
|
+
# page = example_wiki.find_by_title("foo")
|
53
|
+
# page.pageid #=> 10
|
54
|
+
#
|
55
|
+
# # a collection by pageids
|
56
|
+
# result = example_wiki.find_by_pageids(10,11)
|
57
|
+
# result.pages.collect(&:title) #=> ["foo", "bar"]
|
58
|
+
#
|
59
|
+
# # a collection by titles
|
60
|
+
# result = example_wiki.find_by_titles("foo", "bar")
|
61
|
+
# result.pages.collect(&:pageid) #=> [10, 11]
|
62
|
+
#
|
63
|
+
class MediaWiki
|
64
|
+
PROPS = [:info, :revisions, :links, :langlinks, :images, :imageinfo,
|
65
|
+
:templates, :categories, :extlinks, :categoryinfo]
|
66
|
+
RVPROPS = [:ids, :flags, :timestamp, :user, :size, :comment, :content]
|
67
|
+
|
68
|
+
|
69
|
+
def initialize(url)
|
70
|
+
@url = url
|
71
|
+
end
|
72
|
+
|
73
|
+
# find by pageid
|
74
|
+
def find(*opts)
|
75
|
+
find_by_pageids(opts).pages.first
|
76
|
+
end
|
77
|
+
|
78
|
+
# find the articles identified by the Array page_ids
|
79
|
+
def find_by_pageids(*opts)
|
80
|
+
page_ids, opts_qs = handle_options(opts)
|
81
|
+
page_ids_qs = make_qs("pageids", page_ids)
|
82
|
+
MediaWikiBase.new(make_url(opts_qs.push(page_ids_qs)))
|
83
|
+
end
|
84
|
+
|
85
|
+
# Same as find_by_titles but returns a single page
|
86
|
+
def find_by_title(*opts)
|
87
|
+
find_by_titles(opts).pages.first
|
88
|
+
end
|
89
|
+
|
90
|
+
# find the articles identified by the Array titles
|
91
|
+
def find_by_titles(*opts)
|
92
|
+
titles, opts_qs = handle_options(opts)
|
93
|
+
titles_qs = make_qs("titles", titles)
|
94
|
+
MediaWikiBase.new(make_url(opts_qs.push(titles_qs)))
|
95
|
+
end
|
96
|
+
|
97
|
+
class MediaWikiBase
|
98
|
+
|
99
|
+
attr_accessor :xml, :pages
|
100
|
+
|
101
|
+
def initialize(url)
|
102
|
+
@xml = get_xml(url)
|
103
|
+
@pages = (@xml/:api/:query/:pages/:page).collect{|p| Page.new(p) }
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
# Page encapsulates the properties of wikipedia page.
|
108
|
+
class Page
|
109
|
+
attr_accessor *PROPS
|
110
|
+
attr_accessor :title, :pageid
|
111
|
+
|
112
|
+
def initialize(page)
|
113
|
+
@title = page.attributes['title']
|
114
|
+
@pageid = page.attributes['pageid']
|
115
|
+
@links = (page/:links/:pl).collect{|pl| pl.attributes['title']}
|
116
|
+
@langlinks = (page/:langlinks/:ll).collect{|ll| ll.attributes['lang']}
|
117
|
+
@images = (page/:images/:im).collect{|im| im.attributes['title']}
|
118
|
+
@templates = (page/:templates/:tl).collect{|tl| tl.attributes['title']}
|
119
|
+
@extlinks = (page/:extlinks/:el).collect{|el| el.inner_html}
|
120
|
+
@revisions = (page/:revisions/:rev).collect{|rev| Revision.new(rev)}
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
class Revision
|
125
|
+
attr_accessor *RVPROPS
|
126
|
+
attr_accessor :revid
|
127
|
+
|
128
|
+
def initialize(rev)
|
129
|
+
@revid = rev.attributes['revid']
|
130
|
+
@user = rev.attributes['user']
|
131
|
+
@timestamp = Time.parse(rev.attributes['timestamp'])
|
132
|
+
@comment = rev.attributes['comment']
|
133
|
+
@content = rev.inner_html
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
protected
|
138
|
+
def get_xml(url)
|
139
|
+
Hpricot.XML(open(url))
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
protected
|
145
|
+
def make_url(*opts)
|
146
|
+
@url + "?" + (["action=query", "format=xml"] + opts).join('&')
|
147
|
+
end
|
148
|
+
|
149
|
+
def handle_options(opts)
|
150
|
+
arr = opts.delete_if{|o| o.is_a? Hash}
|
151
|
+
hash = (opts - arr).first
|
152
|
+
[arr, handle_opts_hash(hash)]
|
153
|
+
end
|
154
|
+
|
155
|
+
def handle_opts_hash(opts)
|
156
|
+
opts ||= {}
|
157
|
+
res = []
|
158
|
+
|
159
|
+
opts[:prop] ||= PROPS
|
160
|
+
opts[:prop] = opts[:prop] & PROPS
|
161
|
+
res << make_qs("prop", opts[:prop])
|
162
|
+
|
163
|
+
if opts[:revids]
|
164
|
+
res << make_qs("revids", opts[:revids])
|
165
|
+
end
|
166
|
+
|
167
|
+
if opts[:rvprop]
|
168
|
+
opts[:rvprop] = opts[:rvprop] & RVPROPS
|
169
|
+
res << make_qs("rvprop", opts[:rvprop])
|
170
|
+
end
|
171
|
+
|
172
|
+
res
|
173
|
+
end
|
174
|
+
|
175
|
+
def make_qs(name, collection)
|
176
|
+
"#{name}=#{CGI.escape(collection.join('|'))}"
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
180
|
+
|
181
|
+
|
data/lib/wikipedia.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'mediawiki'
|
2
|
+
|
3
|
+
# The Wikipedia constant allows the use of Wikipedia's Query API from Ruby
|
4
|
+
# The wrapping is incomplete and the interface will be cleaned up as work is
|
5
|
+
# done.
|
6
|
+
#
|
7
|
+
# == Usage
|
8
|
+
#
|
9
|
+
# The simplest case is just finding pages by title. The Wikipedia API allows
|
10
|
+
# requests to be on multiple titles or ids, so this wrapping returns an array of
|
11
|
+
# pages
|
12
|
+
#
|
13
|
+
# require 'wikipedia'
|
14
|
+
# page = Wikipedia.find_by_titles('Foo').pages.first
|
15
|
+
# page.title #=> "Foo"
|
16
|
+
#
|
17
|
+
# Pages can also be found based on pageid
|
18
|
+
#
|
19
|
+
# page = Wikipedia.find_by_pageids(10).pages.first
|
20
|
+
# page.title #=> "AccessibleComputing"
|
21
|
+
#
|
22
|
+
# Further API options can be specified in the optional second parameter to
|
23
|
+
# find_by_*. This can be used to limit the fetching of unnecessary data
|
24
|
+
#
|
25
|
+
# page = Wikipedia.find_by_titles('Foo', :prop => [:langlinks]).pages.first
|
26
|
+
# page.langlinks #=> ["da", "fi", "it", "no", "sl", "vi"]
|
27
|
+
#
|
28
|
+
Wikipedia = MediaWiki.new("http://en.wikipedia.org/w/api.php")
|
data/test/sample.xml
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<api>
|
3
|
+
<query>
|
4
|
+
<pages>
|
5
|
+
<page lastrevid="208372148" touched="2008-04-29T06:15:08Z" title="Foo" ns="0" length="3945" counter="0" pageid="9132808">
|
6
|
+
<revisions>
|
7
|
+
<rev timestamp="2008-04-26T19:07:10Z" user="Andreas Kaufmann" revid="208372148" comment="Removed category "Computer programming"; Quick-adding category "Variable (computer programming)" (using [[WP:HOTCAT|HotCat]])" minor=""/>
|
8
|
+
</revisions>
|
9
|
+
<links>
|
10
|
+
<pl title="2007" ns="0"/>
|
11
|
+
<pl title="Abstract entity" ns="0"/>
|
12
|
+
<pl title="Abstraction (computer science)" ns="0"/>
|
13
|
+
<pl title="Algebra" ns="0"/>
|
14
|
+
<pl title="Apt:foo" ns="0"/>
|
15
|
+
<pl title="Backronym" ns="0"/>
|
16
|
+
<pl title="Bar (computer science)" ns="0"/>
|
17
|
+
<pl title="Bill Gates" ns="0"/>
|
18
|
+
<pl title="Canonical" ns="0"/>
|
19
|
+
<pl title="Commands" ns="0"/>
|
20
|
+
<pl title="Computer programming" ns="0"/>
|
21
|
+
<pl title="Computer science" ns="0"/>
|
22
|
+
<pl title="Cruft" ns="0"/>
|
23
|
+
<pl title="Data" ns="0"/>
|
24
|
+
<pl title="Eric S. Raymond" ns="0"/>
|
25
|
+
<pl title="FUBAR" ns="0"/>
|
26
|
+
<pl title="Foo Camp" ns="0"/>
|
27
|
+
<pl title="Foo Fighter" ns="0"/>
|
28
|
+
<pl title="Foo Fighters" ns="0"/>
|
29
|
+
<pl title="Foobar" ns="0"/>
|
30
|
+
<pl title="Functions" ns="0"/>
|
31
|
+
<pl title="Hacker convention" ns="0"/>
|
32
|
+
<pl title="Internet Engineering Task Force" ns="0"/>
|
33
|
+
<pl title="Kludge" ns="0"/>
|
34
|
+
<pl title="List of Daffy Duck cartoons" ns="0"/>
|
35
|
+
<pl title="MIT Press" ns="0"/>
|
36
|
+
<pl title="Metasyntactic variable" ns="0"/>
|
37
|
+
<pl title="Neologism" ns="0"/>
|
38
|
+
<pl title="November 5" ns="0"/>
|
39
|
+
<pl title="O'Reilly Media" ns="0"/>
|
40
|
+
<pl title="Perl" ns="0"/>
|
41
|
+
<pl title="Pogo (comics)" ns="0"/>
|
42
|
+
<pl title="Pseudocode" ns="0"/>
|
43
|
+
<pl title="Request for Comments" ns="0"/>
|
44
|
+
<pl title="Smokey Stover" ns="0"/>
|
45
|
+
<pl title="Subroutine" ns="0"/>
|
46
|
+
<pl title="United States v. Microsoft" ns="0"/>
|
47
|
+
<pl title="Variable" ns="0"/>
|
48
|
+
<pl title="Variables" ns="0"/>
|
49
|
+
<pl title="Web Services Interoperability" ns="0"/>
|
50
|
+
<pl title="Wiktionary" ns="0"/>
|
51
|
+
</links>
|
52
|
+
<langlinks>
|
53
|
+
<ll lang="da">Foo (data)</ll>
|
54
|
+
<ll lang="fi">Foo</ll>
|
55
|
+
<ll lang="it">Foo</ll>
|
56
|
+
<ll lang="no">Foo</ll>
|
57
|
+
<ll lang="sl">Foo</ll>
|
58
|
+
<ll lang="vi">Foo</ll>
|
59
|
+
</langlinks>
|
60
|
+
<images>
|
61
|
+
<im title="Image:Wiktionary-logo-en.svg" ns="6"/>
|
62
|
+
</images>
|
63
|
+
<templates>
|
64
|
+
<tl title="Template:Cite book" ns="10"/>
|
65
|
+
<tl title="Template:Cite web" ns="10"/>
|
66
|
+
<tl title="Template:De icon" ns="10"/>
|
67
|
+
<tl title="Template:Languageicon" ns="10"/>
|
68
|
+
<tl title="Template:Reflist" ns="10"/>
|
69
|
+
<tl title="Template:Wiktionary" ns="10"/>
|
70
|
+
</templates>
|
71
|
+
<categories>
|
72
|
+
<cl title="Category:Placeholder names" ns="14"/>
|
73
|
+
<cl title="Category:Variable (computer programming)" ns="14"/>
|
74
|
+
</categories>
|
75
|
+
<extlinks>
|
76
|
+
<el>http://books.google.com/books?id=POlUJW3Z9McC&pg=PA5&dq=foo+jargon&ei=GnIvR8PwGJiSpgK1qIT6CQ&ie=ISO-8859-1&sig=hIE0I8TtPGKUbSU-wgDTm4hQ8ig#PPA4,M1</el>
|
77
|
+
<el>http://foo-magazin.de/</el>
|
78
|
+
<el>http://tools.ietf.org/html/rfc3092</el>
|
79
|
+
<el>http://www.ietf.org/rfc/rfc3092.txt</el>
|
80
|
+
<el>http://www.news.com/Microsoft-ploy-to-block-Sun-exposed/2100-1001_3-912906.html</el>
|
81
|
+
</extlinks>
|
82
|
+
</page>
|
83
|
+
</pages>
|
84
|
+
</query>
|
85
|
+
</api>
|
data/test/test.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'shoulda'
|
4
|
+
require File.dirname(__FILE__) + '/../lib/mediawiki'
|
5
|
+
|
6
|
+
class MediaWiki
|
7
|
+
class MediaWikiBase
|
8
|
+
def get_xml(url)
|
9
|
+
Hpricot.XML(open(File.dirname(__FILE__) + '/sample.xml'))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class MediaWikiTest < Test::Unit::TestCase
|
15
|
+
def setup
|
16
|
+
@mw = MediaWiki.new("http://mock.com/api.php")
|
17
|
+
end
|
18
|
+
|
19
|
+
context "MediaWiki interface" do
|
20
|
+
should("find article by id"){ assert @mw.find(10) }
|
21
|
+
should("find article by title"){ assert @mw.find_by_title("Foo") }
|
22
|
+
should("find articles by ids"){ assert @mw.find_by_pageids(10,11) }
|
23
|
+
should("find articles by titles"){ assert @mw.find_by_titles("Foo","Bar") }
|
24
|
+
end
|
25
|
+
|
26
|
+
context "MediaWiki base" do
|
27
|
+
should("have xml"){ assert @mw.find_by_titles("Foo").xml }
|
28
|
+
should("have pages"){ assert @mw.find_by_titles("Foo").pages }
|
29
|
+
end
|
30
|
+
|
31
|
+
context "MediaWiki pages" do
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wikipedia-api-fork
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 25
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 1
|
10
|
+
version: 0.1.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Ben Hughes
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2008-05-06 00:00:00 -04:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description:
|
23
|
+
email: ben@pixelmachine.org
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files: []
|
29
|
+
|
30
|
+
files:
|
31
|
+
- README
|
32
|
+
- LICENSE
|
33
|
+
- lib/mediawiki.rb
|
34
|
+
- lib/wikipedia.rb
|
35
|
+
- test/sample.xml
|
36
|
+
- test/test.rb
|
37
|
+
has_rdoc: true
|
38
|
+
homepage: http://github.com/schleyfox/wikipedia-api/
|
39
|
+
licenses: []
|
40
|
+
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
hash: 3
|
52
|
+
segments:
|
53
|
+
- 0
|
54
|
+
version: "0"
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
hash: 3
|
61
|
+
segments:
|
62
|
+
- 0
|
63
|
+
version: "0"
|
64
|
+
requirements: []
|
65
|
+
|
66
|
+
rubyforge_project:
|
67
|
+
rubygems_version: 1.6.2
|
68
|
+
signing_key:
|
69
|
+
specification_version: 3
|
70
|
+
summary: Wikipedia-API is a ruby wrapper for the MediaWiki API
|
71
|
+
test_files: []
|
72
|
+
|