wikipedia-api-fork 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2008 Ben Hughes
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1 @@
1
+ Utility for interacting with the mediawiki api
data/lib/mediawiki.rb ADDED
@@ -0,0 +1,181 @@
1
+ # This file contains magical incantations to interface with the MediaWiki
2
+ # API. This is very much a work in progress so don't count on it not changing
3
+ # (for the better).
4
+ #
5
+ # The MediaWiki class wraps all the functionality for general MediaWiki usage.
6
+ # You can also require wikipedia.rb to get the
7
+ # Wikipedia[link:/files/lib/wikipedia_rb.html] constant that wraps
8
+ # up the basic functionality.
9
+ #
10
+ # == Installation
11
+ # sudo gem install schleyfox-wikipedia-api --source=http://gems.github.com
12
+ #
13
+ # == Basic Usage
14
+ #
15
+ # For example, to get a page from Wikiquote
16
+ #
17
+ # require 'mediawiki'
18
+ # w = MediaWiki.new('http://en.wikiquote.org/w/api.php')
19
+ # w.find_by_title('Oscar Wilde')
20
+ #
21
+ # If you want Wikipedia, you can use the built in constant
22
+ #
23
+ # require 'wikipedia'
24
+ # Wikipedia.find_by_title('Oscar Wilde')
25
+ #
26
+ # == Source
27
+ #
28
+ # Check out the source on github http://github.com/schleyfox/wikipedia-api
29
+
30
+ ['hpricot', 'cgi', 'open-uri'].each {|f| require f}
31
+
32
+
33
+ # The MediaWiki class allows one to interface with the MediaWiki API.
34
+ # Everything about it is incomplete and I promise that it will eat your kids
35
+ # and/or small furry woodland creatures. These things happen.
36
+ #
37
+ # == Usage
38
+ #
39
+ # To use, you construct a MediaWiki object for the site
40
+ #
41
+ # require 'mediawiki'
42
+ # example_wiki = MediaWiki.new("http://example.com/w/api.php")
43
+ #
44
+ # From here you can query based on title or pageid for individual pages or
45
+ # collections
46
+ #
47
+ # # By pageid
48
+ # page = example_wiki.find(10)
49
+ # page.title #=> "foo"
50
+ #
51
+ # # By title
52
+ # page = example_wiki.find_by_title("foo")
53
+ # page.pageid #=> 10
54
+ #
55
+ # # a collection by pageids
56
+ # result = example_wiki.find_by_pageids(10,11)
57
+ # result.pages.collect(&:title) #=> ["foo", "bar"]
58
+ #
59
+ # # a collection by titles
60
+ # result = example_wiki.find_by_titles("foo", "bar")
61
+ # result.pages.collect(&:pageid) #=> [10, 11]
62
+ #
63
+ class MediaWiki
64
+ PROPS = [:info, :revisions, :links, :langlinks, :images, :imageinfo,
65
+ :templates, :categories, :extlinks, :categoryinfo]
66
+ RVPROPS = [:ids, :flags, :timestamp, :user, :size, :comment, :content]
67
+
68
+
69
+ def initialize(url)
70
+ @url = url
71
+ end
72
+
73
+ # find by pageid
74
+ def find(*opts)
75
+ find_by_pageids(opts).pages.first
76
+ end
77
+
78
+ # find the articles identified by the Array page_ids
79
+ def find_by_pageids(*opts)
80
+ page_ids, opts_qs = handle_options(opts)
81
+ page_ids_qs = make_qs("pageids", page_ids)
82
+ MediaWikiBase.new(make_url(opts_qs.push(page_ids_qs)))
83
+ end
84
+
85
+ # Same as find_by_titles but returns a single page
86
+ def find_by_title(*opts)
87
+ find_by_titles(opts).pages.first
88
+ end
89
+
90
+ # find the articles identified by the Array titles
91
+ def find_by_titles(*opts)
92
+ titles, opts_qs = handle_options(opts)
93
+ titles_qs = make_qs("titles", titles)
94
+ MediaWikiBase.new(make_url(opts_qs.push(titles_qs)))
95
+ end
96
+
97
+ class MediaWikiBase
98
+
99
+ attr_accessor :xml, :pages
100
+
101
+ def initialize(url)
102
+ @xml = get_xml(url)
103
+ @pages = (@xml/:api/:query/:pages/:page).collect{|p| Page.new(p) }
104
+ end
105
+
106
+
107
+ # Page encapsulates the properties of wikipedia page.
108
+ class Page
109
+ attr_accessor *PROPS
110
+ attr_accessor :title, :pageid
111
+
112
+ def initialize(page)
113
+ @title = page.attributes['title']
114
+ @pageid = page.attributes['pageid']
115
+ @links = (page/:links/:pl).collect{|pl| pl.attributes['title']}
116
+ @langlinks = (page/:langlinks/:ll).collect{|ll| ll.attributes['lang']}
117
+ @images = (page/:images/:im).collect{|im| im.attributes['title']}
118
+ @templates = (page/:templates/:tl).collect{|tl| tl.attributes['title']}
119
+ @extlinks = (page/:extlinks/:el).collect{|el| el.inner_html}
120
+ @revisions = (page/:revisions/:rev).collect{|rev| Revision.new(rev)}
121
+ end
122
+ end
123
+
124
+ class Revision
125
+ attr_accessor *RVPROPS
126
+ attr_accessor :revid
127
+
128
+ def initialize(rev)
129
+ @revid = rev.attributes['revid']
130
+ @user = rev.attributes['user']
131
+ @timestamp = Time.parse(rev.attributes['timestamp'])
132
+ @comment = rev.attributes['comment']
133
+ @content = rev.inner_html
134
+ end
135
+ end
136
+
137
+ protected
138
+ def get_xml(url)
139
+ Hpricot.XML(open(url))
140
+ end
141
+ end
142
+
143
+
144
+ protected
145
+ def make_url(*opts)
146
+ @url + "?" + (["action=query", "format=xml"] + opts).join('&')
147
+ end
148
+
149
+ def handle_options(opts)
150
+ arr = opts.delete_if{|o| o.is_a? Hash}
151
+ hash = (opts - arr).first
152
+ [arr, handle_opts_hash(hash)]
153
+ end
154
+
155
+ def handle_opts_hash(opts)
156
+ opts ||= {}
157
+ res = []
158
+
159
+ opts[:prop] ||= PROPS
160
+ opts[:prop] = opts[:prop] & PROPS
161
+ res << make_qs("prop", opts[:prop])
162
+
163
+ if opts[:revids]
164
+ res << make_qs("revids", opts[:revids])
165
+ end
166
+
167
+ if opts[:rvprop]
168
+ opts[:rvprop] = opts[:rvprop] & RVPROPS
169
+ res << make_qs("rvprop", opts[:rvprop])
170
+ end
171
+
172
+ res
173
+ end
174
+
175
+ def make_qs(name, collection)
176
+ "#{name}=#{CGI.escape(collection.join('|'))}"
177
+ end
178
+
179
+ end
180
+
181
+
data/lib/wikipedia.rb ADDED
@@ -0,0 +1,28 @@
1
+ require 'mediawiki'
2
+
3
+ # The Wikipedia constant allows the use of Wikipedia's Query API from Ruby
4
+ # The wrapping is incomplete and the interface will be cleaned up as work is
5
+ # done.
6
+ #
7
+ # == Usage
8
+ #
9
+ # The simplest case is just finding pages by title. The Wikipedia API allows
10
+ # requests to be on multiple titles or ids, so this wrapping returns an array of
11
+ # pages
12
+ #
13
+ # require 'wikipedia'
14
+ # page = Wikipedia.find_by_titles('Foo').pages.first
15
+ # page.title #=> "Foo"
16
+ #
17
+ # Pages can also be found based on pageid
18
+ #
19
+ # page = Wikipedia.find_by_pageids(10).pages.first
20
+ # page.title #=> "AccessibleComputing"
21
+ #
22
+ # Further API options can be specified in the optional second parameter to
23
+ # find_by_*. This can be used to limit the fetching of unnecessary data
24
+ #
25
+ # page = Wikipedia.find_by_titles('Foo', :prop => [:langlinks]).pages.first
26
+ # page.langlinks #=> ["da", "fi", "it", "no", "sl", "vi"]
27
+ #
28
+ Wikipedia = MediaWiki.new("http://en.wikipedia.org/w/api.php")
data/test/sample.xml ADDED
@@ -0,0 +1,85 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <api>
3
+ <query>
4
+ <pages>
5
+ <page lastrevid="208372148" touched="2008-04-29T06:15:08Z" title="Foo" ns="0" length="3945" counter="0" pageid="9132808">
6
+ <revisions>
7
+ <rev timestamp="2008-04-26T19:07:10Z" user="Andreas Kaufmann" revid="208372148" comment="Removed category &quot;Computer programming&quot;; Quick-adding category &quot;Variable (computer programming)&quot; (using [[WP:HOTCAT|HotCat]])" minor=""/>
8
+ </revisions>
9
+ <links>
10
+ <pl title="2007" ns="0"/>
11
+ <pl title="Abstract entity" ns="0"/>
12
+ <pl title="Abstraction (computer science)" ns="0"/>
13
+ <pl title="Algebra" ns="0"/>
14
+ <pl title="Apt:foo" ns="0"/>
15
+ <pl title="Backronym" ns="0"/>
16
+ <pl title="Bar (computer science)" ns="0"/>
17
+ <pl title="Bill Gates" ns="0"/>
18
+ <pl title="Canonical" ns="0"/>
19
+ <pl title="Commands" ns="0"/>
20
+ <pl title="Computer programming" ns="0"/>
21
+ <pl title="Computer science" ns="0"/>
22
+ <pl title="Cruft" ns="0"/>
23
+ <pl title="Data" ns="0"/>
24
+ <pl title="Eric S. Raymond" ns="0"/>
25
+ <pl title="FUBAR" ns="0"/>
26
+ <pl title="Foo Camp" ns="0"/>
27
+ <pl title="Foo Fighter" ns="0"/>
28
+ <pl title="Foo Fighters" ns="0"/>
29
+ <pl title="Foobar" ns="0"/>
30
+ <pl title="Functions" ns="0"/>
31
+ <pl title="Hacker convention" ns="0"/>
32
+ <pl title="Internet Engineering Task Force" ns="0"/>
33
+ <pl title="Kludge" ns="0"/>
34
+ <pl title="List of Daffy Duck cartoons" ns="0"/>
35
+ <pl title="MIT Press" ns="0"/>
36
+ <pl title="Metasyntactic variable" ns="0"/>
37
+ <pl title="Neologism" ns="0"/>
38
+ <pl title="November 5" ns="0"/>
39
+ <pl title="O'Reilly Media" ns="0"/>
40
+ <pl title="Perl" ns="0"/>
41
+ <pl title="Pogo (comics)" ns="0"/>
42
+ <pl title="Pseudocode" ns="0"/>
43
+ <pl title="Request for Comments" ns="0"/>
44
+ <pl title="Smokey Stover" ns="0"/>
45
+ <pl title="Subroutine" ns="0"/>
46
+ <pl title="United States v. Microsoft" ns="0"/>
47
+ <pl title="Variable" ns="0"/>
48
+ <pl title="Variables" ns="0"/>
49
+ <pl title="Web Services Interoperability" ns="0"/>
50
+ <pl title="Wiktionary" ns="0"/>
51
+ </links>
52
+ <langlinks>
53
+ <ll lang="da">Foo (data)</ll>
54
+ <ll lang="fi">Foo</ll>
55
+ <ll lang="it">Foo</ll>
56
+ <ll lang="no">Foo</ll>
57
+ <ll lang="sl">Foo</ll>
58
+ <ll lang="vi">Foo</ll>
59
+ </langlinks>
60
+ <images>
61
+ <im title="Image:Wiktionary-logo-en.svg" ns="6"/>
62
+ </images>
63
+ <templates>
64
+ <tl title="Template:Cite book" ns="10"/>
65
+ <tl title="Template:Cite web" ns="10"/>
66
+ <tl title="Template:De icon" ns="10"/>
67
+ <tl title="Template:Languageicon" ns="10"/>
68
+ <tl title="Template:Reflist" ns="10"/>
69
+ <tl title="Template:Wiktionary" ns="10"/>
70
+ </templates>
71
+ <categories>
72
+ <cl title="Category:Placeholder names" ns="14"/>
73
+ <cl title="Category:Variable (computer programming)" ns="14"/>
74
+ </categories>
75
+ <extlinks>
76
+ <el>http://books.google.com/books?id=POlUJW3Z9McC&amp;pg=PA5&amp;dq=foo+jargon&amp;ei=GnIvR8PwGJiSpgK1qIT6CQ&amp;ie=ISO-8859-1&amp;sig=hIE0I8TtPGKUbSU-wgDTm4hQ8ig#PPA4,M1</el>
77
+ <el>http://foo-magazin.de/</el>
78
+ <el>http://tools.ietf.org/html/rfc3092</el>
79
+ <el>http://www.ietf.org/rfc/rfc3092.txt</el>
80
+ <el>http://www.news.com/Microsoft-ploy-to-block-Sun-exposed/2100-1001_3-912906.html</el>
81
+ </extlinks>
82
+ </page>
83
+ </pages>
84
+ </query>
85
+ </api>
data/test/test.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ require 'shoulda'
4
+ require File.dirname(__FILE__) + '/../lib/mediawiki'
5
+
6
+ class MediaWiki
7
+ class MediaWikiBase
8
+ def get_xml(url)
9
+ Hpricot.XML(open(File.dirname(__FILE__) + '/sample.xml'))
10
+ end
11
+ end
12
+ end
13
+
14
+ class MediaWikiTest < Test::Unit::TestCase
15
+ def setup
16
+ @mw = MediaWiki.new("http://mock.com/api.php")
17
+ end
18
+
19
+ context "MediaWiki interface" do
20
+ should("find article by id"){ assert @mw.find(10) }
21
+ should("find article by title"){ assert @mw.find_by_title("Foo") }
22
+ should("find articles by ids"){ assert @mw.find_by_pageids(10,11) }
23
+ should("find articles by titles"){ assert @mw.find_by_titles("Foo","Bar") }
24
+ end
25
+
26
+ context "MediaWiki base" do
27
+ should("have xml"){ assert @mw.find_by_titles("Foo").xml }
28
+ should("have pages"){ assert @mw.find_by_titles("Foo").pages }
29
+ end
30
+
31
+ context "MediaWiki pages" do
32
+ end
33
+ end
34
+
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wikipedia-api-fork
3
+ version: !ruby/object:Gem::Version
4
+ hash: 25
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 1
10
+ version: 0.1.1
11
+ platform: ruby
12
+ authors:
13
+ - Ben Hughes
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2008-05-06 00:00:00 -04:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description:
23
+ email: ben@pixelmachine.org
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - README
32
+ - LICENSE
33
+ - lib/mediawiki.rb
34
+ - lib/wikipedia.rb
35
+ - test/sample.xml
36
+ - test/test.rb
37
+ has_rdoc: true
38
+ homepage: http://github.com/schleyfox/wikipedia-api/
39
+ licenses: []
40
+
41
+ post_install_message:
42
+ rdoc_options: []
43
+
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ hash: 3
52
+ segments:
53
+ - 0
54
+ version: "0"
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ hash: 3
61
+ segments:
62
+ - 0
63
+ version: "0"
64
+ requirements: []
65
+
66
+ rubyforge_project:
67
+ rubygems_version: 1.6.2
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: Wikipedia-API is a ruby wrapper for the MediaWiki API
71
+ test_files: []
72
+