wikipedia-api-fork 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +22 -0
- data/README +1 -0
- data/lib/mediawiki.rb +181 -0
- data/lib/wikipedia.rb +28 -0
- data/test/sample.xml +85 -0
- data/test/test.rb +34 -0
- metadata +72 -0
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2008 Ben Hughes
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Utility for interacting with the mediawiki api
|
data/lib/mediawiki.rb
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
# This file contains magical incantations to interface with the MediaWiki
|
2
|
+
# API. This is very much a work in progress so don't count on it not changing
|
3
|
+
# (for the better).
|
4
|
+
#
|
5
|
+
# The MediaWiki class wraps all the functionality for general MediaWiki usage.
|
6
|
+
# You can also require wikipedia.rb to get the
|
7
|
+
# Wikipedia[link:/files/lib/wikipedia_rb.html] constant that wraps
|
8
|
+
# up the basic functionality.
|
9
|
+
#
|
10
|
+
# == Installation
|
11
|
+
# sudo gem install schleyfox-wikipedia-api --source=http://gems.github.com
|
12
|
+
#
|
13
|
+
# == Basic Usage
|
14
|
+
#
|
15
|
+
# For example, to get a page from Wikiquote
|
16
|
+
#
|
17
|
+
# require 'mediawiki'
|
18
|
+
# w = MediaWiki.new('http://en.wikiquote.org/w/api.php')
|
19
|
+
# w.find_by_title('Oscar Wilde')
|
20
|
+
#
|
21
|
+
# If you want Wikipedia, you can use the built in constant
|
22
|
+
#
|
23
|
+
# require 'wikipedia'
|
24
|
+
# Wikipedia.find_by_title('Oscar Wilde')
|
25
|
+
#
|
26
|
+
# == Source
|
27
|
+
#
|
28
|
+
# Check out the source on github http://github.com/schleyfox/wikipedia-api
|
29
|
+
|
30
|
+
['hpricot', 'cgi', 'open-uri'].each {|f| require f}
|
31
|
+
|
32
|
+
|
33
|
+
# The MediaWiki class allows one to interface with the MediaWiki API.
|
34
|
+
# Everything about it is incomplete and I promise that it will eat your kids
|
35
|
+
# and/or small furry woodland creatures. These things happen.
|
36
|
+
#
|
37
|
+
# == Usage
|
38
|
+
#
|
39
|
+
# To use, you construct a MediaWiki object for the site
|
40
|
+
#
|
41
|
+
# require 'mediawiki'
|
42
|
+
# example_wiki = MediaWiki.new("http://example.com/w/api.php")
|
43
|
+
#
|
44
|
+
# From here you can query based on title or pageid for individual pages or
|
45
|
+
# collections
|
46
|
+
#
|
47
|
+
# # By pageid
|
48
|
+
# page = example_wiki.find(10)
|
49
|
+
# page.title #=> "foo"
|
50
|
+
#
|
51
|
+
# # By title
|
52
|
+
# page = example_wiki.find_by_title("foo")
|
53
|
+
# page.pageid #=> 10
|
54
|
+
#
|
55
|
+
# # a collection by pageids
|
56
|
+
# result = example_wiki.find_by_pageids(10,11)
|
57
|
+
# result.pages.collect(&:title) #=> ["foo", "bar"]
|
58
|
+
#
|
59
|
+
# # a collection by titles
|
60
|
+
# result = example_wiki.find_by_titles("foo", "bar")
|
61
|
+
# result.pages.collect(&:pageid) #=> [10, 11]
|
62
|
+
#
|
63
|
+
class MediaWiki
|
64
|
+
PROPS = [:info, :revisions, :links, :langlinks, :images, :imageinfo,
|
65
|
+
:templates, :categories, :extlinks, :categoryinfo]
|
66
|
+
RVPROPS = [:ids, :flags, :timestamp, :user, :size, :comment, :content]
|
67
|
+
|
68
|
+
|
69
|
+
def initialize(url)
|
70
|
+
@url = url
|
71
|
+
end
|
72
|
+
|
73
|
+
# find by pageid
|
74
|
+
def find(*opts)
|
75
|
+
find_by_pageids(opts).pages.first
|
76
|
+
end
|
77
|
+
|
78
|
+
# find the articles identified by the Array page_ids
|
79
|
+
def find_by_pageids(*opts)
|
80
|
+
page_ids, opts_qs = handle_options(opts)
|
81
|
+
page_ids_qs = make_qs("pageids", page_ids)
|
82
|
+
MediaWikiBase.new(make_url(opts_qs.push(page_ids_qs)))
|
83
|
+
end
|
84
|
+
|
85
|
+
# Same as find_by_titles but returns a single page
|
86
|
+
def find_by_title(*opts)
|
87
|
+
find_by_titles(opts).pages.first
|
88
|
+
end
|
89
|
+
|
90
|
+
# find the articles identified by the Array titles
|
91
|
+
def find_by_titles(*opts)
|
92
|
+
titles, opts_qs = handle_options(opts)
|
93
|
+
titles_qs = make_qs("titles", titles)
|
94
|
+
MediaWikiBase.new(make_url(opts_qs.push(titles_qs)))
|
95
|
+
end
|
96
|
+
|
97
|
+
class MediaWikiBase
|
98
|
+
|
99
|
+
attr_accessor :xml, :pages
|
100
|
+
|
101
|
+
def initialize(url)
|
102
|
+
@xml = get_xml(url)
|
103
|
+
@pages = (@xml/:api/:query/:pages/:page).collect{|p| Page.new(p) }
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
# Page encapsulates the properties of wikipedia page.
|
108
|
+
class Page
|
109
|
+
attr_accessor *PROPS
|
110
|
+
attr_accessor :title, :pageid
|
111
|
+
|
112
|
+
def initialize(page)
|
113
|
+
@title = page.attributes['title']
|
114
|
+
@pageid = page.attributes['pageid']
|
115
|
+
@links = (page/:links/:pl).collect{|pl| pl.attributes['title']}
|
116
|
+
@langlinks = (page/:langlinks/:ll).collect{|ll| ll.attributes['lang']}
|
117
|
+
@images = (page/:images/:im).collect{|im| im.attributes['title']}
|
118
|
+
@templates = (page/:templates/:tl).collect{|tl| tl.attributes['title']}
|
119
|
+
@extlinks = (page/:extlinks/:el).collect{|el| el.inner_html}
|
120
|
+
@revisions = (page/:revisions/:rev).collect{|rev| Revision.new(rev)}
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
class Revision
|
125
|
+
attr_accessor *RVPROPS
|
126
|
+
attr_accessor :revid
|
127
|
+
|
128
|
+
def initialize(rev)
|
129
|
+
@revid = rev.attributes['revid']
|
130
|
+
@user = rev.attributes['user']
|
131
|
+
@timestamp = Time.parse(rev.attributes['timestamp'])
|
132
|
+
@comment = rev.attributes['comment']
|
133
|
+
@content = rev.inner_html
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
protected
|
138
|
+
def get_xml(url)
|
139
|
+
Hpricot.XML(open(url))
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
protected
|
145
|
+
def make_url(*opts)
|
146
|
+
@url + "?" + (["action=query", "format=xml"] + opts).join('&')
|
147
|
+
end
|
148
|
+
|
149
|
+
def handle_options(opts)
|
150
|
+
arr = opts.delete_if{|o| o.is_a? Hash}
|
151
|
+
hash = (opts - arr).first
|
152
|
+
[arr, handle_opts_hash(hash)]
|
153
|
+
end
|
154
|
+
|
155
|
+
def handle_opts_hash(opts)
|
156
|
+
opts ||= {}
|
157
|
+
res = []
|
158
|
+
|
159
|
+
opts[:prop] ||= PROPS
|
160
|
+
opts[:prop] = opts[:prop] & PROPS
|
161
|
+
res << make_qs("prop", opts[:prop])
|
162
|
+
|
163
|
+
if opts[:revids]
|
164
|
+
res << make_qs("revids", opts[:revids])
|
165
|
+
end
|
166
|
+
|
167
|
+
if opts[:rvprop]
|
168
|
+
opts[:rvprop] = opts[:rvprop] & RVPROPS
|
169
|
+
res << make_qs("rvprop", opts[:rvprop])
|
170
|
+
end
|
171
|
+
|
172
|
+
res
|
173
|
+
end
|
174
|
+
|
175
|
+
def make_qs(name, collection)
|
176
|
+
"#{name}=#{CGI.escape(collection.join('|'))}"
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
180
|
+
|
181
|
+
|
data/lib/wikipedia.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'mediawiki'
|
2
|
+
|
3
|
+
# The Wikipedia constant allows the use of Wikipedia's Query API from Ruby
|
4
|
+
# The wrapping is incomplete and the interface will be cleaned up as work is
|
5
|
+
# done.
|
6
|
+
#
|
7
|
+
# == Usage
|
8
|
+
#
|
9
|
+
# The simplest case is just finding pages by title. The Wikipedia API allows
|
10
|
+
# requests to be on multiple titles or ids, so this wrapping returns an array of
|
11
|
+
# pages
|
12
|
+
#
|
13
|
+
# require 'wikipedia'
|
14
|
+
# page = Wikipedia.find_by_titles('Foo').pages.first
|
15
|
+
# page.title #=> "Foo"
|
16
|
+
#
|
17
|
+
# Pages can also be found based on pageid
|
18
|
+
#
|
19
|
+
# page = Wikipedia.find_by_pageids(10).pages.first
|
20
|
+
# page.title #=> "AccessibleComputing"
|
21
|
+
#
|
22
|
+
# Further API options can be specified in the optional second parameter to
|
23
|
+
# find_by_*. This can be used to limit the fetching of unnecessary data
|
24
|
+
#
|
25
|
+
# page = Wikipedia.find_by_titles('Foo', :prop => [:langlinks]).pages.first
|
26
|
+
# page.langlinks #=> ["da", "fi", "it", "no", "sl", "vi"]
|
27
|
+
#
|
28
|
+
Wikipedia = MediaWiki.new("http://en.wikipedia.org/w/api.php")
|
data/test/sample.xml
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<api>
|
3
|
+
<query>
|
4
|
+
<pages>
|
5
|
+
<page lastrevid="208372148" touched="2008-04-29T06:15:08Z" title="Foo" ns="0" length="3945" counter="0" pageid="9132808">
|
6
|
+
<revisions>
|
7
|
+
<rev timestamp="2008-04-26T19:07:10Z" user="Andreas Kaufmann" revid="208372148" comment="Removed category "Computer programming"; Quick-adding category "Variable (computer programming)" (using [[WP:HOTCAT|HotCat]])" minor=""/>
|
8
|
+
</revisions>
|
9
|
+
<links>
|
10
|
+
<pl title="2007" ns="0"/>
|
11
|
+
<pl title="Abstract entity" ns="0"/>
|
12
|
+
<pl title="Abstraction (computer science)" ns="0"/>
|
13
|
+
<pl title="Algebra" ns="0"/>
|
14
|
+
<pl title="Apt:foo" ns="0"/>
|
15
|
+
<pl title="Backronym" ns="0"/>
|
16
|
+
<pl title="Bar (computer science)" ns="0"/>
|
17
|
+
<pl title="Bill Gates" ns="0"/>
|
18
|
+
<pl title="Canonical" ns="0"/>
|
19
|
+
<pl title="Commands" ns="0"/>
|
20
|
+
<pl title="Computer programming" ns="0"/>
|
21
|
+
<pl title="Computer science" ns="0"/>
|
22
|
+
<pl title="Cruft" ns="0"/>
|
23
|
+
<pl title="Data" ns="0"/>
|
24
|
+
<pl title="Eric S. Raymond" ns="0"/>
|
25
|
+
<pl title="FUBAR" ns="0"/>
|
26
|
+
<pl title="Foo Camp" ns="0"/>
|
27
|
+
<pl title="Foo Fighter" ns="0"/>
|
28
|
+
<pl title="Foo Fighters" ns="0"/>
|
29
|
+
<pl title="Foobar" ns="0"/>
|
30
|
+
<pl title="Functions" ns="0"/>
|
31
|
+
<pl title="Hacker convention" ns="0"/>
|
32
|
+
<pl title="Internet Engineering Task Force" ns="0"/>
|
33
|
+
<pl title="Kludge" ns="0"/>
|
34
|
+
<pl title="List of Daffy Duck cartoons" ns="0"/>
|
35
|
+
<pl title="MIT Press" ns="0"/>
|
36
|
+
<pl title="Metasyntactic variable" ns="0"/>
|
37
|
+
<pl title="Neologism" ns="0"/>
|
38
|
+
<pl title="November 5" ns="0"/>
|
39
|
+
<pl title="O'Reilly Media" ns="0"/>
|
40
|
+
<pl title="Perl" ns="0"/>
|
41
|
+
<pl title="Pogo (comics)" ns="0"/>
|
42
|
+
<pl title="Pseudocode" ns="0"/>
|
43
|
+
<pl title="Request for Comments" ns="0"/>
|
44
|
+
<pl title="Smokey Stover" ns="0"/>
|
45
|
+
<pl title="Subroutine" ns="0"/>
|
46
|
+
<pl title="United States v. Microsoft" ns="0"/>
|
47
|
+
<pl title="Variable" ns="0"/>
|
48
|
+
<pl title="Variables" ns="0"/>
|
49
|
+
<pl title="Web Services Interoperability" ns="0"/>
|
50
|
+
<pl title="Wiktionary" ns="0"/>
|
51
|
+
</links>
|
52
|
+
<langlinks>
|
53
|
+
<ll lang="da">Foo (data)</ll>
|
54
|
+
<ll lang="fi">Foo</ll>
|
55
|
+
<ll lang="it">Foo</ll>
|
56
|
+
<ll lang="no">Foo</ll>
|
57
|
+
<ll lang="sl">Foo</ll>
|
58
|
+
<ll lang="vi">Foo</ll>
|
59
|
+
</langlinks>
|
60
|
+
<images>
|
61
|
+
<im title="Image:Wiktionary-logo-en.svg" ns="6"/>
|
62
|
+
</images>
|
63
|
+
<templates>
|
64
|
+
<tl title="Template:Cite book" ns="10"/>
|
65
|
+
<tl title="Template:Cite web" ns="10"/>
|
66
|
+
<tl title="Template:De icon" ns="10"/>
|
67
|
+
<tl title="Template:Languageicon" ns="10"/>
|
68
|
+
<tl title="Template:Reflist" ns="10"/>
|
69
|
+
<tl title="Template:Wiktionary" ns="10"/>
|
70
|
+
</templates>
|
71
|
+
<categories>
|
72
|
+
<cl title="Category:Placeholder names" ns="14"/>
|
73
|
+
<cl title="Category:Variable (computer programming)" ns="14"/>
|
74
|
+
</categories>
|
75
|
+
<extlinks>
|
76
|
+
<el>http://books.google.com/books?id=POlUJW3Z9McC&pg=PA5&dq=foo+jargon&ei=GnIvR8PwGJiSpgK1qIT6CQ&ie=ISO-8859-1&sig=hIE0I8TtPGKUbSU-wgDTm4hQ8ig#PPA4,M1</el>
|
77
|
+
<el>http://foo-magazin.de/</el>
|
78
|
+
<el>http://tools.ietf.org/html/rfc3092</el>
|
79
|
+
<el>http://www.ietf.org/rfc/rfc3092.txt</el>
|
80
|
+
<el>http://www.news.com/Microsoft-ploy-to-block-Sun-exposed/2100-1001_3-912906.html</el>
|
81
|
+
</extlinks>
|
82
|
+
</page>
|
83
|
+
</pages>
|
84
|
+
</query>
|
85
|
+
</api>
|
data/test/test.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'shoulda'
|
4
|
+
require File.dirname(__FILE__) + '/../lib/mediawiki'
|
5
|
+
|
6
|
+
class MediaWiki
|
7
|
+
class MediaWikiBase
|
8
|
+
def get_xml(url)
|
9
|
+
Hpricot.XML(open(File.dirname(__FILE__) + '/sample.xml'))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class MediaWikiTest < Test::Unit::TestCase
|
15
|
+
def setup
|
16
|
+
@mw = MediaWiki.new("http://mock.com/api.php")
|
17
|
+
end
|
18
|
+
|
19
|
+
context "MediaWiki interface" do
|
20
|
+
should("find article by id"){ assert @mw.find(10) }
|
21
|
+
should("find article by title"){ assert @mw.find_by_title("Foo") }
|
22
|
+
should("find articles by ids"){ assert @mw.find_by_pageids(10,11) }
|
23
|
+
should("find articles by titles"){ assert @mw.find_by_titles("Foo","Bar") }
|
24
|
+
end
|
25
|
+
|
26
|
+
context "MediaWiki base" do
|
27
|
+
should("have xml"){ assert @mw.find_by_titles("Foo").xml }
|
28
|
+
should("have pages"){ assert @mw.find_by_titles("Foo").pages }
|
29
|
+
end
|
30
|
+
|
31
|
+
context "MediaWiki pages" do
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wikipedia-api-fork
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 25
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 1
|
10
|
+
version: 0.1.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Ben Hughes
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2008-05-06 00:00:00 -04:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description:
|
23
|
+
email: ben@pixelmachine.org
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files: []
|
29
|
+
|
30
|
+
files:
|
31
|
+
- README
|
32
|
+
- LICENSE
|
33
|
+
- lib/mediawiki.rb
|
34
|
+
- lib/wikipedia.rb
|
35
|
+
- test/sample.xml
|
36
|
+
- test/test.rb
|
37
|
+
has_rdoc: true
|
38
|
+
homepage: http://github.com/schleyfox/wikipedia-api/
|
39
|
+
licenses: []
|
40
|
+
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
hash: 3
|
52
|
+
segments:
|
53
|
+
- 0
|
54
|
+
version: "0"
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
hash: 3
|
61
|
+
segments:
|
62
|
+
- 0
|
63
|
+
version: "0"
|
64
|
+
requirements: []
|
65
|
+
|
66
|
+
rubyforge_project:
|
67
|
+
rubygems_version: 1.6.2
|
68
|
+
signing_key:
|
69
|
+
specification_version: 3
|
70
|
+
summary: Wikipedia-API is a ruby wrapper for the MediaWiki API
|
71
|
+
test_files: []
|
72
|
+
|