wikipedia-api-fork 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2008 Ben Hughes
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1 @@
1
+ Utility for interacting with the mediawiki api
data/lib/mediawiki.rb ADDED
@@ -0,0 +1,181 @@
1
+ # This file contains magical incantations to interface with the MediaWiki
2
+ # API. This is very much a work in progress so don't count on it not changing
3
+ # (for the better).
4
+ #
5
+ # The MediaWiki class wraps all the functionality for general MediaWiki usage.
6
+ # You can also require wikipedia.rb to get the
7
+ # Wikipedia[link:/files/lib/wikipedia_rb.html] constant that wraps
8
+ # up the basic functionality.
9
+ #
10
+ # == Installation
11
+ # sudo gem install schleyfox-wikipedia-api --source=http://gems.github.com
12
+ #
13
+ # == Basic Usage
14
+ #
15
+ # For example, to get a page from Wikiquote
16
+ #
17
+ # require 'mediawiki'
18
+ # w = MediaWiki.new('http://en.wikiquote.org/w/api.php')
19
+ # w.find_by_title('Oscar Wilde')
20
+ #
21
+ # If you want Wikipedia, you can use the built in constant
22
+ #
23
+ # require 'wikipedia'
24
+ # Wikipedia.find_by_title('Oscar Wilde')
25
+ #
26
+ # == Source
27
+ #
28
+ # Check out the source on github http://github.com/schleyfox/wikipedia-api
29
+
30
+ ['hpricot', 'cgi', 'open-uri'].each {|f| require f}
31
+
32
+
33
+ # The MediaWiki class allows one to interface with the MediaWiki API.
34
+ # Everything about it is incomplete and I promise that it will eat your kids
35
+ # and/or small furry woodland creatures. These things happen.
36
+ #
37
+ # == Usage
38
+ #
39
+ # To use, you construct a MediaWiki object for the site
40
+ #
41
+ # require 'mediawiki'
42
+ # example_wiki = MediaWiki.new("http://example.com/w/api.php")
43
+ #
44
+ # From here you can query based on title or pageid for individual pages or
45
+ # collections
46
+ #
47
+ # # By pageid
48
+ # page = example_wiki.find(10)
49
+ # page.title #=> "foo"
50
+ #
51
+ # # By title
52
+ # page = example_wiki.find_by_title("foo")
53
+ # page.pageid #=> 10
54
+ #
55
+ # # a collection by pageids
56
+ # result = example_wiki.find_by_pageids(10,11)
57
+ # result.pages.collect(&:title) #=> ["foo", "bar"]
58
+ #
59
+ # # a collection by titles
60
+ # result = example_wiki.find_by_titles("foo", "bar")
61
+ # result.pages.collect(&:pageid) #=> [10, 11]
62
+ #
63
+ class MediaWiki
64
+ PROPS = [:info, :revisions, :links, :langlinks, :images, :imageinfo,
65
+ :templates, :categories, :extlinks, :categoryinfo]
66
+ RVPROPS = [:ids, :flags, :timestamp, :user, :size, :comment, :content]
67
+
68
+
69
+ def initialize(url)
70
+ @url = url
71
+ end
72
+
73
+ # find by pageid
74
+ def find(*opts)
75
+ find_by_pageids(opts).pages.first
76
+ end
77
+
78
+ # find the articles identified by the Array page_ids
79
+ def find_by_pageids(*opts)
80
+ page_ids, opts_qs = handle_options(opts)
81
+ page_ids_qs = make_qs("pageids", page_ids)
82
+ MediaWikiBase.new(make_url(opts_qs.push(page_ids_qs)))
83
+ end
84
+
85
+ # Same as find_by_titles but returns a single page
86
+ def find_by_title(*opts)
87
+ find_by_titles(opts).pages.first
88
+ end
89
+
90
+ # find the articles identified by the Array titles
91
+ def find_by_titles(*opts)
92
+ titles, opts_qs = handle_options(opts)
93
+ titles_qs = make_qs("titles", titles)
94
+ MediaWikiBase.new(make_url(opts_qs.push(titles_qs)))
95
+ end
96
+
97
+ class MediaWikiBase
98
+
99
+ attr_accessor :xml, :pages
100
+
101
+ def initialize(url)
102
+ @xml = get_xml(url)
103
+ @pages = (@xml/:api/:query/:pages/:page).collect{|p| Page.new(p) }
104
+ end
105
+
106
+
107
+ # Page encapsulates the properties of wikipedia page.
108
+ class Page
109
+ attr_accessor *PROPS
110
+ attr_accessor :title, :pageid
111
+
112
+ def initialize(page)
113
+ @title = page.attributes['title']
114
+ @pageid = page.attributes['pageid']
115
+ @links = (page/:links/:pl).collect{|pl| pl.attributes['title']}
116
+ @langlinks = (page/:langlinks/:ll).collect{|ll| ll.attributes['lang']}
117
+ @images = (page/:images/:im).collect{|im| im.attributes['title']}
118
+ @templates = (page/:templates/:tl).collect{|tl| tl.attributes['title']}
119
+ @extlinks = (page/:extlinks/:el).collect{|el| el.inner_html}
120
+ @revisions = (page/:revisions/:rev).collect{|rev| Revision.new(rev)}
121
+ end
122
+ end
123
+
124
+ class Revision
125
+ attr_accessor *RVPROPS
126
+ attr_accessor :revid
127
+
128
+ def initialize(rev)
129
+ @revid = rev.attributes['revid']
130
+ @user = rev.attributes['user']
131
+ @timestamp = Time.parse(rev.attributes['timestamp'])
132
+ @comment = rev.attributes['comment']
133
+ @content = rev.inner_html
134
+ end
135
+ end
136
+
137
+ protected
138
+ def get_xml(url)
139
+ Hpricot.XML(open(url))
140
+ end
141
+ end
142
+
143
+
144
+ protected
145
+ def make_url(*opts)
146
+ @url + "?" + (["action=query", "format=xml"] + opts).join('&')
147
+ end
148
+
149
+ def handle_options(opts)
150
+ arr = opts.delete_if{|o| o.is_a? Hash}
151
+ hash = (opts - arr).first
152
+ [arr, handle_opts_hash(hash)]
153
+ end
154
+
155
+ def handle_opts_hash(opts)
156
+ opts ||= {}
157
+ res = []
158
+
159
+ opts[:prop] ||= PROPS
160
+ opts[:prop] = opts[:prop] & PROPS
161
+ res << make_qs("prop", opts[:prop])
162
+
163
+ if opts[:revids]
164
+ res << make_qs("revids", opts[:revids])
165
+ end
166
+
167
+ if opts[:rvprop]
168
+ opts[:rvprop] = opts[:rvprop] & RVPROPS
169
+ res << make_qs("rvprop", opts[:rvprop])
170
+ end
171
+
172
+ res
173
+ end
174
+
175
+ def make_qs(name, collection)
176
+ "#{name}=#{CGI.escape(collection.join('|'))}"
177
+ end
178
+
179
+ end
180
+
181
+
data/lib/wikipedia.rb ADDED
@@ -0,0 +1,28 @@
1
+ require 'mediawiki'
2
+
3
+ # The Wikipedia constant allows the use of Wikipedia's Query API from Ruby
4
+ # The wrapping is incomplete and the interface will be cleaned up as work is
5
+ # done.
6
+ #
7
+ # == Usage
8
+ #
9
+ # The simplest case is just finding pages by title. The Wikipedia API allows
10
+ # requests to be on multiple titles or ids, so this wrapping returns an array of
11
+ # pages
12
+ #
13
+ # require 'wikipedia'
14
+ # page = Wikipedia.find_by_titles('Foo').pages.first
15
+ # page.title #=> "Foo"
16
+ #
17
+ # Pages can also be found based on pageid
18
+ #
19
+ # page = Wikipedia.find_by_pageids(10).pages.first
20
+ # page.title #=> "AccessibleComputing"
21
+ #
22
+ # Further API options can be specified in the optional second parameter to
23
+ # find_by_*. This can be used to limit the fetching of unnecessary data
24
+ #
25
+ # page = Wikipedia.find_by_titles('Foo', :prop => [:langlinks]).pages.first
26
+ # page.langlinks #=> ["da", "fi", "it", "no", "sl", "vi"]
27
+ #
28
+ Wikipedia = MediaWiki.new("http://en.wikipedia.org/w/api.php")
data/test/sample.xml ADDED
@@ -0,0 +1,85 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <api>
3
+ <query>
4
+ <pages>
5
+ <page lastrevid="208372148" touched="2008-04-29T06:15:08Z" title="Foo" ns="0" length="3945" counter="0" pageid="9132808">
6
+ <revisions>
7
+ <rev timestamp="2008-04-26T19:07:10Z" user="Andreas Kaufmann" revid="208372148" comment="Removed category &quot;Computer programming&quot;; Quick-adding category &quot;Variable (computer programming)&quot; (using [[WP:HOTCAT|HotCat]])" minor=""/>
8
+ </revisions>
9
+ <links>
10
+ <pl title="2007" ns="0"/>
11
+ <pl title="Abstract entity" ns="0"/>
12
+ <pl title="Abstraction (computer science)" ns="0"/>
13
+ <pl title="Algebra" ns="0"/>
14
+ <pl title="Apt:foo" ns="0"/>
15
+ <pl title="Backronym" ns="0"/>
16
+ <pl title="Bar (computer science)" ns="0"/>
17
+ <pl title="Bill Gates" ns="0"/>
18
+ <pl title="Canonical" ns="0"/>
19
+ <pl title="Commands" ns="0"/>
20
+ <pl title="Computer programming" ns="0"/>
21
+ <pl title="Computer science" ns="0"/>
22
+ <pl title="Cruft" ns="0"/>
23
+ <pl title="Data" ns="0"/>
24
+ <pl title="Eric S. Raymond" ns="0"/>
25
+ <pl title="FUBAR" ns="0"/>
26
+ <pl title="Foo Camp" ns="0"/>
27
+ <pl title="Foo Fighter" ns="0"/>
28
+ <pl title="Foo Fighters" ns="0"/>
29
+ <pl title="Foobar" ns="0"/>
30
+ <pl title="Functions" ns="0"/>
31
+ <pl title="Hacker convention" ns="0"/>
32
+ <pl title="Internet Engineering Task Force" ns="0"/>
33
+ <pl title="Kludge" ns="0"/>
34
+ <pl title="List of Daffy Duck cartoons" ns="0"/>
35
+ <pl title="MIT Press" ns="0"/>
36
+ <pl title="Metasyntactic variable" ns="0"/>
37
+ <pl title="Neologism" ns="0"/>
38
+ <pl title="November 5" ns="0"/>
39
+ <pl title="O'Reilly Media" ns="0"/>
40
+ <pl title="Perl" ns="0"/>
41
+ <pl title="Pogo (comics)" ns="0"/>
42
+ <pl title="Pseudocode" ns="0"/>
43
+ <pl title="Request for Comments" ns="0"/>
44
+ <pl title="Smokey Stover" ns="0"/>
45
+ <pl title="Subroutine" ns="0"/>
46
+ <pl title="United States v. Microsoft" ns="0"/>
47
+ <pl title="Variable" ns="0"/>
48
+ <pl title="Variables" ns="0"/>
49
+ <pl title="Web Services Interoperability" ns="0"/>
50
+ <pl title="Wiktionary" ns="0"/>
51
+ </links>
52
+ <langlinks>
53
+ <ll lang="da">Foo (data)</ll>
54
+ <ll lang="fi">Foo</ll>
55
+ <ll lang="it">Foo</ll>
56
+ <ll lang="no">Foo</ll>
57
+ <ll lang="sl">Foo</ll>
58
+ <ll lang="vi">Foo</ll>
59
+ </langlinks>
60
+ <images>
61
+ <im title="Image:Wiktionary-logo-en.svg" ns="6"/>
62
+ </images>
63
+ <templates>
64
+ <tl title="Template:Cite book" ns="10"/>
65
+ <tl title="Template:Cite web" ns="10"/>
66
+ <tl title="Template:De icon" ns="10"/>
67
+ <tl title="Template:Languageicon" ns="10"/>
68
+ <tl title="Template:Reflist" ns="10"/>
69
+ <tl title="Template:Wiktionary" ns="10"/>
70
+ </templates>
71
+ <categories>
72
+ <cl title="Category:Placeholder names" ns="14"/>
73
+ <cl title="Category:Variable (computer programming)" ns="14"/>
74
+ </categories>
75
+ <extlinks>
76
+ <el>http://books.google.com/books?id=POlUJW3Z9McC&amp;pg=PA5&amp;dq=foo+jargon&amp;ei=GnIvR8PwGJiSpgK1qIT6CQ&amp;ie=ISO-8859-1&amp;sig=hIE0I8TtPGKUbSU-wgDTm4hQ8ig#PPA4,M1</el>
77
+ <el>http://foo-magazin.de/</el>
78
+ <el>http://tools.ietf.org/html/rfc3092</el>
79
+ <el>http://www.ietf.org/rfc/rfc3092.txt</el>
80
+ <el>http://www.news.com/Microsoft-ploy-to-block-Sun-exposed/2100-1001_3-912906.html</el>
81
+ </extlinks>
82
+ </page>
83
+ </pages>
84
+ </query>
85
+ </api>
data/test/test.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ require 'shoulda'
4
+ require File.dirname(__FILE__) + '/../lib/mediawiki'
5
+
6
+ class MediaWiki
7
+ class MediaWikiBase
8
+ def get_xml(url)
9
+ Hpricot.XML(open(File.dirname(__FILE__) + '/sample.xml'))
10
+ end
11
+ end
12
+ end
13
+
14
+ class MediaWikiTest < Test::Unit::TestCase
15
+ def setup
16
+ @mw = MediaWiki.new("http://mock.com/api.php")
17
+ end
18
+
19
+ context "MediaWiki interface" do
20
+ should("find article by id"){ assert @mw.find(10) }
21
+ should("find article by title"){ assert @mw.find_by_title("Foo") }
22
+ should("find articles by ids"){ assert @mw.find_by_pageids(10,11) }
23
+ should("find articles by titles"){ assert @mw.find_by_titles("Foo","Bar") }
24
+ end
25
+
26
+ context "MediaWiki base" do
27
+ should("have xml"){ assert @mw.find_by_titles("Foo").xml }
28
+ should("have pages"){ assert @mw.find_by_titles("Foo").pages }
29
+ end
30
+
31
+ context "MediaWiki pages" do
32
+ end
33
+ end
34
+
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wikipedia-api-fork
3
+ version: !ruby/object:Gem::Version
4
+ hash: 25
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 1
10
+ version: 0.1.1
11
+ platform: ruby
12
+ authors:
13
+ - Ben Hughes
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2008-05-06 00:00:00 -04:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description:
23
+ email: ben@pixelmachine.org
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - README
32
+ - LICENSE
33
+ - lib/mediawiki.rb
34
+ - lib/wikipedia.rb
35
+ - test/sample.xml
36
+ - test/test.rb
37
+ has_rdoc: true
38
+ homepage: http://github.com/schleyfox/wikipedia-api/
39
+ licenses: []
40
+
41
+ post_install_message:
42
+ rdoc_options: []
43
+
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ hash: 3
52
+ segments:
53
+ - 0
54
+ version: "0"
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ hash: 3
61
+ segments:
62
+ - 0
63
+ version: "0"
64
+ requirements: []
65
+
66
+ rubyforge_project:
67
+ rubygems_version: 1.6.2
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: Wikipedia-API is a ruby wrapper for the MediaWiki API
71
+ test_files: []
72
+