schleyfox-wikipedia-api 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2008 Ben Hughes
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1 @@
1
+ Utility for interacting with the mediawiki api
@@ -0,0 +1,158 @@
1
+ # This file contains magical incantations to interface with the MediaWiki
2
+ # API. This is very much a work in progress so don't count on it not changing
3
+ # (for the better).
4
+ #
5
+ # Check out the source on github http://github.com/schleyfox/wikipedia-api
6
+
7
+ ['hpricot', 'cgi', 'open-uri'].each {|f| require f}
8
+
9
+
10
+ # The MediaWiki class allows one to interface with the MediaWiki API.
11
+ # Everything about it is incomplete and I promise that it will eat your kids
12
+ # and/or small furry woodland creatures. These things happen.
13
+ #
14
+ # == Usage
15
+ #
16
+ # To use, you construct a MediaWiki object for the site
17
+ #
18
+ # require 'mediawiki'
19
+ # example_wiki = MediaWiki.new("http://example.com/w/api.php")
20
+ #
21
+ # From here you can query based on title or pageid for individual pages or
22
+ # collections
23
+ #
24
+ # # By pageid
25
+ # page = example_wiki.find(10)
26
+ # page.title #=> "foo"
27
+ #
28
+ # # By title
29
+ # page = example_wiki.find_by_title("foo")
30
+ # page.pageid #=> 10
31
+ #
32
+ # # a collection by pageids
33
+ # result = example_wiki.find_by_pageids(10,11)
34
+ # result.pages.collect(&:title) #=> ["foo", "bar"]
35
+ #
36
+ # # a collection by titles
37
+ # result = example_wiki.find_by_titles("foo", "bar")
38
+ # result.pages.collect(&:pageid) #=> [10, 11]
39
+ #
40
+ class MediaWiki
41
+ PROPS = [:info, :revisions, :links, :langlinks, :images, :imageinfo,
42
+ :templates, :categories, :extlinks, :categoryinfo]
43
+ RVPROPS = [:ids, :flags, :timestamp, :user, :size, :comment, :content]
44
+
45
+
46
+ def initialize(url)
47
+ @url = url
48
+ end
49
+
50
+ # find by pageid
51
+ def find(*opts)
52
+ find_by_pageids(opts).pages.first
53
+ end
54
+
55
+ # find the articles identified by the Array page_ids
56
+ def find_by_pageids(*opts)
57
+ page_ids, opts_qs = handle_options(opts)
58
+ page_ids_qs = make_qs("pageids", page_ids)
59
+ MediaWikiBase.new(make_url(opts_qs.push(page_ids_qs)))
60
+ end
61
+
62
+ # Same as find_by_titles but returns a single page
63
+ def find_by_title(*opts)
64
+ find_by_titles(opts).pages.first
65
+ end
66
+
67
+ # find the articles identified by the Array titles
68
+ def find_by_titles(*opts)
69
+ titles, opts_qs = handle_options(opts)
70
+ titles_qs = make_qs("titles", titles)
71
+ MediaWikiBase.new(make_url(opts_qs.push(titles_qs)))
72
+ end
73
+
74
+ class MediaWikiBase
75
+
76
+ attr_accessor :xml, :pages
77
+
78
+ def initialize(url)
79
+ @xml = get_xml(url)
80
+ @pages = (@xml/:api/:query/:pages/:page).collect{|p| Page.new(p) }
81
+ end
82
+
83
+
84
+ # Page encapsulates the properties of wikipedia page.
85
+ class Page
86
+ attr_accessor *PROPS
87
+ attr_accessor :title, :pageid
88
+
89
+ def initialize(page)
90
+ @title = page.attributes['title']
91
+ @pageid = page.attributes['pageid']
92
+ @links = (page/:links/:pl).collect{|pl| pl.attributes['title']}
93
+ @langlinks = (page/:langlinks/:ll).collect{|ll| ll.attributes['lang']}
94
+ @images = (page/:images/:im).collect{|im| im.attributes['title']}
95
+ @templates = (page/:templates/:tl).collect{|tl| tl.attributes['title']}
96
+ @extlinks = (page/:extlinks/:el).collect{|el| el.inner_html}
97
+ @revisions = (page/:revisions/:rev).collect{|rev| Revision.new(rev)}
98
+ end
99
+ end
100
+
101
+ class Revision
102
+ attr_accessor *RVPROPS
103
+ attr_accessor :revid
104
+
105
+ def initialize(rev)
106
+ @revid = rev.attributes['revid']
107
+ @user = rev.attributes['user']
108
+ @timestamp = Time.parse(rev.attributes['timestamp'])
109
+ @comment = rev.attributes['comment']
110
+ @content = rev.inner_html
111
+ end
112
+ end
113
+
114
+ protected
115
+ def get_xml(url)
116
+ Hpricot.XML(open(url))
117
+ end
118
+ end
119
+
120
+
121
+ protected
122
+ def make_url(*opts)
123
+ @url + "?" + (["action=query", "format=xml"] + opts).join('&')
124
+ end
125
+
126
+ def handle_options(opts)
127
+ arr = opts.delete_if{|o| o.is_a? Hash}
128
+ hash = (opts - arr).first
129
+ [arr, handle_opts_hash(hash)]
130
+ end
131
+
132
+ def handle_opts_hash(opts)
133
+ opts ||= {}
134
+ res = []
135
+
136
+ opts[:prop] ||= PROPS
137
+ opts[:prop] = opts[:prop] & PROPS
138
+ res << make_qs("prop", opts[:prop])
139
+
140
+ if opts[:revids]
141
+ res << make_qs("revids", opts[:revids])
142
+ end
143
+
144
+ if opts[:rvprop]
145
+ opts[:rvprop] = opts[:rvprop] & RVPROPS
146
+ res << make_qs("rvprop", opts[:rvprop])
147
+ end
148
+
149
+ res
150
+ end
151
+
152
+ def make_qs(name, collection)
153
+ "#{name}=#{CGI.escape(collection.join('|'))}"
154
+ end
155
+
156
+ end
157
+
158
+
@@ -0,0 +1,28 @@
1
+ require 'mediawiki'
2
+
3
+ # The Wikipedia constant allows the use of Wikipedia's Query API from Ruby
4
+ # The wrapping is incomplete and the interface will be cleaned up as work is
5
+ # done.
6
+ #
7
+ # == Usage
8
+ #
9
+ # The simplest case is just finding pages by title. The Wikipedia API allows
10
+ # requests to be on multiple titles or ids, so this wrapping returns an array of
11
+ # pages
12
+ #
13
+ # require 'wikipedia'
14
+ # page = Wikipedia.find_by_titles('Foo').pages.first
15
+ # page.title #=> "Foo"
16
+ #
17
+ # Pages can also be found based on pageid
18
+ #
19
+ # page = Wikipedia.find_by_pageids(10).pages.first
20
+ # page.title #=> "AccessibleComputing"
21
+ #
22
+ # Further API options can be specified in the optional second parameter to
23
+ # find_by_*. This can be used to limit the fetching of unnecessary data
24
+ #
25
+ # page = Wikipedia.find_by_titles('Foo', :prop => [:langlinks]).pages.first
26
+ # page.langlinks #=> ["da", "fi", "it", "no", "sl", "vi"]
27
+ #
28
+ Wikipedia = MediaWiki.new("http://en.wikipedia.org/w/api.php")
@@ -0,0 +1,85 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <api>
3
+ <query>
4
+ <pages>
5
+ <page lastrevid="208372148" touched="2008-04-29T06:15:08Z" title="Foo" ns="0" length="3945" counter="0" pageid="9132808">
6
+ <revisions>
7
+ <rev timestamp="2008-04-26T19:07:10Z" user="Andreas Kaufmann" revid="208372148" comment="Removed category &quot;Computer programming&quot;; Quick-adding category &quot;Variable (computer programming)&quot; (using [[WP:HOTCAT|HotCat]])" minor=""/>
8
+ </revisions>
9
+ <links>
10
+ <pl title="2007" ns="0"/>
11
+ <pl title="Abstract entity" ns="0"/>
12
+ <pl title="Abstraction (computer science)" ns="0"/>
13
+ <pl title="Algebra" ns="0"/>
14
+ <pl title="Apt:foo" ns="0"/>
15
+ <pl title="Backronym" ns="0"/>
16
+ <pl title="Bar (computer science)" ns="0"/>
17
+ <pl title="Bill Gates" ns="0"/>
18
+ <pl title="Canonical" ns="0"/>
19
+ <pl title="Commands" ns="0"/>
20
+ <pl title="Computer programming" ns="0"/>
21
+ <pl title="Computer science" ns="0"/>
22
+ <pl title="Cruft" ns="0"/>
23
+ <pl title="Data" ns="0"/>
24
+ <pl title="Eric S. Raymond" ns="0"/>
25
+ <pl title="FUBAR" ns="0"/>
26
+ <pl title="Foo Camp" ns="0"/>
27
+ <pl title="Foo Fighter" ns="0"/>
28
+ <pl title="Foo Fighters" ns="0"/>
29
+ <pl title="Foobar" ns="0"/>
30
+ <pl title="Functions" ns="0"/>
31
+ <pl title="Hacker convention" ns="0"/>
32
+ <pl title="Internet Engineering Task Force" ns="0"/>
33
+ <pl title="Kludge" ns="0"/>
34
+ <pl title="List of Daffy Duck cartoons" ns="0"/>
35
+ <pl title="MIT Press" ns="0"/>
36
+ <pl title="Metasyntactic variable" ns="0"/>
37
+ <pl title="Neologism" ns="0"/>
38
+ <pl title="November 5" ns="0"/>
39
+ <pl title="O'Reilly Media" ns="0"/>
40
+ <pl title="Perl" ns="0"/>
41
+ <pl title="Pogo (comics)" ns="0"/>
42
+ <pl title="Pseudocode" ns="0"/>
43
+ <pl title="Request for Comments" ns="0"/>
44
+ <pl title="Smokey Stover" ns="0"/>
45
+ <pl title="Subroutine" ns="0"/>
46
+ <pl title="United States v. Microsoft" ns="0"/>
47
+ <pl title="Variable" ns="0"/>
48
+ <pl title="Variables" ns="0"/>
49
+ <pl title="Web Services Interoperability" ns="0"/>
50
+ <pl title="Wiktionary" ns="0"/>
51
+ </links>
52
+ <langlinks>
53
+ <ll lang="da">Foo (data)</ll>
54
+ <ll lang="fi">Foo</ll>
55
+ <ll lang="it">Foo</ll>
56
+ <ll lang="no">Foo</ll>
57
+ <ll lang="sl">Foo</ll>
58
+ <ll lang="vi">Foo</ll>
59
+ </langlinks>
60
+ <images>
61
+ <im title="Image:Wiktionary-logo-en.svg" ns="6"/>
62
+ </images>
63
+ <templates>
64
+ <tl title="Template:Cite book" ns="10"/>
65
+ <tl title="Template:Cite web" ns="10"/>
66
+ <tl title="Template:De icon" ns="10"/>
67
+ <tl title="Template:Languageicon" ns="10"/>
68
+ <tl title="Template:Reflist" ns="10"/>
69
+ <tl title="Template:Wiktionary" ns="10"/>
70
+ </templates>
71
+ <categories>
72
+ <cl title="Category:Placeholder names" ns="14"/>
73
+ <cl title="Category:Variable (computer programming)" ns="14"/>
74
+ </categories>
75
+ <extlinks>
76
+ <el>http://books.google.com/books?id=POlUJW3Z9McC&amp;pg=PA5&amp;dq=foo+jargon&amp;ei=GnIvR8PwGJiSpgK1qIT6CQ&amp;ie=ISO-8859-1&amp;sig=hIE0I8TtPGKUbSU-wgDTm4hQ8ig#PPA4,M1</el>
77
+ <el>http://foo-magazin.de/</el>
78
+ <el>http://tools.ietf.org/html/rfc3092</el>
79
+ <el>http://www.ietf.org/rfc/rfc3092.txt</el>
80
+ <el>http://www.news.com/Microsoft-ploy-to-block-Sun-exposed/2100-1001_3-912906.html</el>
81
+ </extlinks>
82
+ </page>
83
+ </pages>
84
+ </query>
85
+ </api>
@@ -0,0 +1,34 @@
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ require 'shoulda'
4
+ require File.dirname(__FILE__) + '/../lib/mediawiki'
5
+
6
+ class MediaWiki
7
+ class MediaWikiBase
8
+ def get_xml(url)
9
+ Hpricot.XML(open(File.dirname(__FILE__) + '/sample.xml'))
10
+ end
11
+ end
12
+ end
13
+
14
+ class MediaWikiTest < Test::Unit::TestCase
15
+ def setup
16
+ @mw = MediaWiki.new("http://mock.com/api.php")
17
+ end
18
+
19
+ context "MediaWiki interface" do
20
+ should("find article by id"){ assert @mw.find(10) }
21
+ should("find article by title"){ assert @mw.find_by_title("Foo") }
22
+ should("find articles by ids"){ assert @mw.find_by_pageids(10,11) }
23
+ should("find articles by titles"){ assert @mw.find_by_titles("Foo","Bar") }
24
+ end
25
+
26
+ context "MediaWiki base" do
27
+ should("have xml"){ assert @mw.find_by_titles("Foo").xml }
28
+ should("have pages"){ assert @mw.find_by_titles("Foo").pages }
29
+ end
30
+
31
+ context "MediaWiki pages" do
32
+ end
33
+ end
34
+
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: schleyfox-wikipedia-api
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Ben Hughes
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-05-06 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: ben@pixelmachine.org
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - README
26
+ - LICENSE
27
+ - lib/mediawiki.rb
28
+ - lib/wikipedia.rb
29
+ - test/sample.xml
30
+ - test/test.rb
31
+ has_rdoc: true
32
+ homepage: http://github.com/schleyfox/wikipedia-api/
33
+ post_install_message:
34
+ rdoc_options: []
35
+
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: "0"
43
+ version:
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ requirements: []
51
+
52
+ rubyforge_project:
53
+ rubygems_version: 1.0.1
54
+ signing_key:
55
+ specification_version: 2
56
+ summary: Wikipedia-API is a ruby wrapper for the MediaWiki API
57
+ test_files: []
58
+