wikiwhat 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +247 -0
- data/Rakefile +4 -0
- data/lib/wikiwhat/api_call.rb +19 -0
- data/lib/wikiwhat/page.rb +126 -0
- data/lib/wikiwhat/parse.rb +199 -0
- data/lib/wikiwhat/version.rb +3 -0
- data/lib/wikiwhat/wikierror.rb +4 -0
- data/lib/wikiwhat.rb +5 -0
- data/spec/api_call_spec.rb +60 -0
- data/spec/fixtures/vcr_cassettes/albert.yml +59 -0
- data/spec/fixtures/vcr_cassettes/image_url.yml +57 -0
- data/spec/fixtures/vcr_cassettes/kel_extract.yml +97 -0
- data/spec/fixtures/vcr_cassettes/kel_revisions.yml +165 -0
- data/spec/fixtures/vcr_cassettes/wikiwhat_run.yml +1668 -0
- data/spec/page_spec.rb +177 -0
- data/spec/parse_spec.rb +65 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/testfiles/api_call_contents.rb +171 -0
- data/spec/testfiles/vcr_setup.rb +6 -0
- data/wikiwhat.gemspec +29 -0
- metadata +177 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: b93f5737ec0a7ba4f70a56855763cead8fd3ff8e
|
|
4
|
+
data.tar.gz: 9382f622d1bc3380c7e19e91e67211c15e9cedde
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 5e7ba1161a11bc7f2096ea652d56da49dbf3938d2f6de3f376a26c5f22aaa806071c1c68bc67e43c2334271a82e04e7b9a953a0631c0e0283acf9cf9b79eefce
|
|
7
|
+
data.tar.gz: e4c3fe3604a508f217795b97e8e6d0c7449415c932564f0a9f010b287693193927633d6f10f688c59f06677d7ec4c761fd77e1ac63adee32ae1e4098430cfc52
|
data/.gitignore
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
*.gem
|
|
2
|
+
*.rbc
|
|
3
|
+
.bundle
|
|
4
|
+
.config
|
|
5
|
+
.yardoc
|
|
6
|
+
Gemfile.lock
|
|
7
|
+
InstalledFiles
|
|
8
|
+
_yardoc
|
|
9
|
+
coverage
|
|
10
|
+
doc/
|
|
11
|
+
lib/bundler/man
|
|
12
|
+
pkg
|
|
13
|
+
rdoc
|
|
14
|
+
spec/reports
|
|
15
|
+
test/tmp
|
|
16
|
+
test/version_tmp
|
|
17
|
+
tmp
|
|
18
|
+
wikihug\ brainstorm
|
|
19
|
+
|
|
20
|
+
vendor/bundle
|
|
21
|
+
|
|
22
|
+
miscdatafiles/*
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Copyright (c) 2013 Bonnie
|
|
2
|
+
|
|
3
|
+
MIT License
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
6
|
+
a copy of this software and associated documentation files (the
|
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
11
|
+
the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be
|
|
14
|
+
included in all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
Wikiwhat
|
|
2
|
+
========
|
|
3
|
+
|
|
4
|
+
A Ruby gem for extracting specific content from a [Wikipedia](http://wikipedia.com) article.
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
## Table of Contents
|
|
8
|
+
|
|
9
|
+
1. [Installation](#installation)
|
|
10
|
+
2. [Usage](#usage)
|
|
11
|
+
3. [Examples](#examples)
|
|
12
|
+
4. [Contribute](#contribute)
|
|
13
|
+
5. [Team](#team)
|
|
14
|
+
6. [License](#license)
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
Add this line to your application's Gemfile:
|
|
19
|
+
|
|
20
|
+
$ gem 'wikiwhat'
|
|
21
|
+
|
|
22
|
+
And then execute:
|
|
23
|
+
|
|
24
|
+
$ bundle
|
|
25
|
+
|
|
26
|
+
Or install it yourself as:
|
|
27
|
+
|
|
28
|
+
$ gem install wikiwhat
|
|
29
|
+
|
|
30
|
+
## Usage
|
|
31
|
+
|
|
32
|
+
This gem makes use of [Wikipedia](http://wikipedia.com), which is a Creative Commons resource. Please check out the [Wikipedia Copyright](http://en.wikipedia.org/wiki/Wikipedia:Copyrights) page for licensing information.
|
|
33
|
+
|
|
34
|
+
######To create a new Wikiwhat::Page object:
|
|
35
|
+
|
|
36
|
+
Each article to be queried should be set up as a new Wikiwhat::Page object. Queries are made via the Wikipedia API, which has limited redirect capabilities. Misspelled or ambiguious titles may return unexpected results.
|
|
37
|
+
|
|
38
|
+
```ruby
|
|
39
|
+
page = Wikiwhat::Page.new("<WIKIPEDIA ARTICLE TITLE>")
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
|
|
44
|
+
```ruby
|
|
45
|
+
pigeon = Wikiwhat::Page.new("Columba livia")
|
|
46
|
+
albert = Wikiwhat::Page.new("Albert Einstein")
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
######Types of infomation available:
|
|
50
|
+
|
|
51
|
+
* Paragraph(s) in an article
|
|
52
|
+
* All paragraphs under a specific header
|
|
53
|
+
* A list of all image titles and corresponding URLs found in an article
|
|
54
|
+
* The sidebar image
|
|
55
|
+
* A list of all references found in an article
|
|
56
|
+
|
|
57
|
+
These methods must be done independently. We currently do not support setting multiple options at once in a single new Wikiwhat object. However, the indivudual methods can all be called on the same instance of Wikiwhat::Page.
|
|
58
|
+
|
|
59
|
+
######To get the first several paragraphs in an article:
|
|
60
|
+
|
|
61
|
+
```ruby
|
|
62
|
+
page = Wikiwhat::Page.new("<WIKIPEDIA ARTICLE TITLE>", :paragraphs => NUMBER)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
You can also call .paragraphs directly on your Wikiwhat::Page object.
|
|
66
|
+
|
|
67
|
+
```ruby
|
|
68
|
+
page.paragraphs(2)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
If no number is specified, `.paragraphs` defaults to `1` ie: the first paragraph on the page.
|
|
72
|
+
|
|
73
|
+
The paragraphs will be returned in an array with each paragraph as a String item in that array. The paragraphs are stored under the instance variable `@paragraphs`. Currently, the text will contain the HTML markup from the page. If more paragraphs than exist are requested, all paragraphs found on the page will be returned without errors.
|
|
74
|
+
|
|
75
|
+
Example 1:
|
|
76
|
+
|
|
77
|
+
```ruby
|
|
78
|
+
pigeon = Wikiwhat::Page.new("Columba livia", :paragraphs => 2)
|
|
79
|
+
|
|
80
|
+
=> ["<p>The <b>Rock Dove</b> (<i>Columba livia</i>) or <b>Rock Pigeon</b> is a member of the bird family Columbidae (doves and pigeons). In common usage, this bird is often simply referred to as the \"pigeon\".</p>",
|
|
81
|
+
"\n<p>The species includes the domestic pigeon (including the fancy pigeon), and escaped domestic pigeons have given rise to feral populations around the world.</p>"]
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Example 2:
|
|
85
|
+
|
|
86
|
+
```ruby
|
|
87
|
+
pigeon = Wikiwhat::Page.new("Columba livia")
|
|
88
|
+
pigeon.find_paragraphs
|
|
89
|
+
|
|
90
|
+
=> ["<p>The <b>Rock Dove</b> (<i>Columba livia</i>) or <b>Rock Pigeon</b> is a member of the bird family Columbidae (doves and pigeons). In common usage, this bird is often simply referred to as the \"pigeon\".</p>"]
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
######To get all the paragraphs under a specific header:
|
|
94
|
+
|
|
95
|
+
```ruby
|
|
96
|
+
page = Wikiwhat::Page.new("<WIKIPEDIA ARTICLE TITLE>", :header => "<Header>")
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
You can also call `.header` directly on your Wikiwhat::Page object.
|
|
100
|
+
|
|
101
|
+
```ruby
|
|
102
|
+
page.header("<Header>")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
If you supply a header that is not found on the page, you will get a WikiwhatError, which is a sublcass of StandardError.
|
|
106
|
+
|
|
107
|
+
The section under the supplied Header will be returned as a String. The section is stored under the instance varaible `@header`. Currently, the text will contain HTML markup.
|
|
108
|
+
|
|
109
|
+
Example 1:
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
pigeon = Wikiwhat::Page.new("Columba livia", :header => "Description")
|
|
113
|
+
|
|
114
|
+
=> "\n\n<p>The adult of the nominate subspecies of the Rock Dove is 29 to 37 cm (11 to 15 in) long with a 62 to 72 cm (24 to 28 in) wingspan. Weight for wild or feral Rock Doves ranges from 238–380 g (8.4–13 oz), though . . ."
|
|
115
|
+
```
|
|
116
|
+
Example 2:
|
|
117
|
+
|
|
118
|
+
```ruby
|
|
119
|
+
pigeon = Wikiwhat::Page.new("Columba livia")
|
|
120
|
+
pigeon.find_header("Predators")
|
|
121
|
+
|
|
122
|
+
=> "\n<p>With only its flying abilities protecting it from predation, rock pigeons are a favorite almost around the world for a wide range of raptorial birds. In fact, with feral pigeons existing in most every city in the world, they may form the majority of prey for several . . ."
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
######To get a list of all the images on a page, plus their URLs:
|
|
126
|
+
|
|
127
|
+
```ruby
|
|
128
|
+
page = Wikiwhat::Page.new("<WIKIPEDIA ARTICLE TITLE>", :img_list => true)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
You can also call `.image_list` directly on your Wikiwhat::Page object.
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
page.image_list
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
This method will return a hash with two keys: `urls` and `titles`. Each key points to an array of strings. Currently, the Wikipedia logo image is included in the list. The hash is stored under the instance variable `@image_list`.
|
|
138
|
+
|
|
139
|
+
Example 1:
|
|
140
|
+
|
|
141
|
+
```ruby
|
|
142
|
+
pigeon = Wikiwhat::Page.new("Columba livia", :img_list => true)
|
|
143
|
+
|
|
144
|
+
=> {:urls=>
|
|
145
|
+
["http://upload.wikimedia.org/wikipedia/commons/4/43/Blue_Rock_Pigeon_%28Columba_livia%29_in_Kolkata_I_IMG_9762.jpg",
|
|
146
|
+
"http://upload.wikimedia.org/wikipedia/commons/e/ec/Blue_Rock_Pigeon_I4_IMG_3038.jpg",
|
|
147
|
+
"http://upload.wikimedia.org/wikipedia/commons/9/98/Columba_livia_1_day_old.jpg",
|
|
148
|
+
"http://upload.wikimedia.org/wikipedia/commons/c/ce/Columba_livia_22_days_old.jpg",
|
|
149
|
+
"http://upload.wikimedia.org/wikipedia/commons/f/ff/Columba_livia_distribution_map.png",
|
|
150
|
+
"http://upload.wikimedia.org/wikipedia/commons/d/d4/Columba_livia_nest_2_eggs.jpg",
|
|
151
|
+
"http://upload.wikimedia.org/wikipedia/commons/4/4b/Doves_fighting.JPG",
|
|
152
|
+
"http://upload.wikimedia.org/wikipedia/commons/1/1f/Feral_Rock_Dove_nest_with_chicks.jpg",
|
|
153
|
+
"http://upload.wikimedia.org/wikipedia/commons/d/d3/Fly_June_2008-2.jpg",
|
|
154
|
+
"http://upload.wikimedia.org/wikipedia/en/4/4a/Commons-logo.svg"],
|
|
155
|
+
:titles=>
|
|
156
|
+
["File:Blue Rock Pigeon (Columba livia) in Kolkata I IMG 9762.jpg",
|
|
157
|
+
"File:Blue Rock Pigeon I4 IMG 3038.jpg",
|
|
158
|
+
"File:Columba livia 1 day old.jpg",
|
|
159
|
+
"File:Columba livia 22 days old.jpg",
|
|
160
|
+
"File:Columba livia distribution map.png",
|
|
161
|
+
"File:Columba livia nest 2 eggs.jpg",
|
|
162
|
+
"File:Doves fighting.JPG",
|
|
163
|
+
"File:Feral Rock Dove nest with chicks.jpg",
|
|
164
|
+
"File:Fly June 2008-2.jpg",
|
|
165
|
+
"File:Commons-logo.svg"]}
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Example 2:
|
|
169
|
+
|
|
170
|
+
```ruby
|
|
171
|
+
pigeon = Wikiwhat::Page.new("Columba livia")
|
|
172
|
+
pigeon.find_image_list
|
|
173
|
+
|
|
174
|
+
=> {:urls=>
|
|
175
|
+
["http://upload.wikimedia.org/wikipedia/commons/4/43/Blue_Rock_Pigeon_%28Columba_livia%29_in_Kolkata_I_IMG_9762.jpg",
|
|
176
|
+
"http://upload.wikimedia.org/wikipedia/commons/e/ec/Blue_Rock_Pigeon_I4_IMG_3038.jpg",
|
|
177
|
+
"http://upload.wikimedia.org/wikipedia/commons/9/98/Columba_livia_1_day_old.jpg",
|
|
178
|
+
"http://upload.wikimedia.org/wikipedia/commons/c/ce/Columba_livia_22_days_old.jpg", ...
|
|
179
|
+
]
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
######To get the sidebar image:
|
|
183
|
+
|
|
184
|
+
```ruby
|
|
185
|
+
page = Wikiwhat::Page.new("<WIKIPEDIA ARTICLE TITLE>", :sidebar_img => true)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
You can also call `.sidebar_image` directly on your Wikiwhat::Page object.
|
|
189
|
+
|
|
190
|
+
```ruby
|
|
191
|
+
page.sidebar_image
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
This method returns the url of the sidebar image as a String. The url is stored under the instance variable `@sidebar_img`.
|
|
195
|
+
|
|
196
|
+
######To get a list of all the references on a page:
|
|
197
|
+
|
|
198
|
+
Here there be dragons. This method is a work in progress and only works on some pages.
|
|
199
|
+
|
|
200
|
+
```ruby
|
|
201
|
+
page = Wikiwhat::Page.new("<WIKIPEDIA ARTICLE TITLE>", :refs => true)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
You can also call `.ref_list ` directly on your Wikiwhat::Page object.
|
|
205
|
+
|
|
206
|
+
```ruby
|
|
207
|
+
page.ref_list
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
This method will return an array. Each reference will be returned as a string nested inside another array. These strings will contain wiki markup. The array is stored under the instance varaible `@ref_list`.
|
|
211
|
+
|
|
212
|
+
Example 1:
|
|
213
|
+
|
|
214
|
+
```ruby
|
|
215
|
+
albert = Wikiwhat::Page.new("Albert Einstein", :refs => true)
|
|
216
|
+
|
|
217
|
+
=> [
|
|
218
|
+
["Zahar, Élie (2001), ''Poincaré's Philosophy. From Conventionalism to Phenomenology'', Carus Publishing Company, [http://books.google.com/?id=jJl2JAqvoSAC
|
|
219
|
+
["{{cite doi|10.1098/rsbm.1955.0005}}"],
|
|
220
|
+
["David Bodanis, ''E = mc<sup>2</sup>: A Biography of the World's Most Famous Equation'' (New York: Walker, 2000)."],
|
|
221
|
+
["{{cite web |url=http://nobelprize.org/nobel_prizes/physics/laureates/1921/ |title=The Nobel Prize in Physics 1921 |accessdate=6 March 2007 |publisher=[[N
|
|
222
|
+
["[http://wordnetweb.princeton.edu/perl/webwn?s=Einstein WordNet for Einstein]."],
|
|
223
|
+
["{{cite web|url=http://www.einstein-website.de/z_information/faq-e.html|title=Frequently asked questions|publisher=einstein-website.de|accessdate=23 July
|
|
224
|
+
["{{cite web|url=http://www.beinglefthanded.com/Left-Handed-Einstein.html|title=Left Handed Einstein|publisher=Being Left Handed.com|accessdate=23 July 201
|
|
225
|
+
["{{Citation |first=P. A. |last=Schilpp (Ed.) |title=Albert Einstein – Autobiographical Notes |pages=8–9 |publisher=[[Open Court Publishing Company]]
|
|
226
|
+
["M. Talmey, ''The Relativity Theory Simplified and the Formative Period of its Inventor''. Falcon Press, 1932, pp. 161–164."], . . . ]
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## Contribute
|
|
230
|
+
|
|
231
|
+
1. Fork it
|
|
232
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
|
233
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
|
234
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
|
235
|
+
5. Create new Pull Request
|
|
236
|
+
|
|
237
|
+
## Team
|
|
238
|
+
|
|
239
|
+
[Bonnie Mattson](https://github.com/kitsunetsuki)
|
|
240
|
+
|
|
241
|
+
[Clare Glinka](https://github.com/cglinka)
|
|
242
|
+
|
|
243
|
+
## License
|
|
244
|
+
|
|
245
|
+
Gem: MIT. Fly free, little birds!
|
|
246
|
+
|
|
247
|
+
Again, all content supplied by the use of this gem is supplied by [Wikipedia](http://wikipedia.com). Please check out the [Wikipedia Copyright](http://en.wikipedia.org/wiki/Wikipedia:Copyrights) page for licensing information.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
require 'open-uri'
|
|
2
|
+
require 'rest_client'
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
module Wikiwhat
|
|
6
|
+
class Call
|
|
7
|
+
# Make a string that is the URL for the API call for text-based requests.
|
|
8
|
+
# Call the API and parse the returning JSON object.
|
|
9
|
+
def self.call_api(title, options={})
|
|
10
|
+
title = URI::encode(title)
|
|
11
|
+
options[:prop] ? prop = "&prop=#{options[:prop]}" : ''
|
|
12
|
+
options[:rvprop] ? rvprop = "&rvprop=content" : rvprop = ''
|
|
13
|
+
options[:img_list] ? img_list = "&generator=images" : img_list = ''
|
|
14
|
+
options[:iiprop] ? iiprop = "&iiprop=url" : iiprop = ''
|
|
15
|
+
|
|
16
|
+
JSON.parse(RestClient.get "http://en.wikipedia.org/w/api.php?action=query#{prop}&titles=#{title}&format=json&redirects#{img_list}#{rvprop}#{iiprop}")
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
require 'wikiwhat/parse'
|
|
3
|
+
require 'wikiwhat/api_call'
|
|
4
|
+
|
|
5
|
+
module Wikiwhat
|
|
6
|
+
class Page
|
|
7
|
+
attr_reader :head, :header, :image_list, :title, :img_list,
|
|
8
|
+
:sidebar_img_url, :ref_list, :paragraphs
|
|
9
|
+
|
|
10
|
+
# Set title of article and type of information requested.
|
|
11
|
+
#
|
|
12
|
+
# title - the title of the requested article as a String
|
|
13
|
+
# img_list - True if desired output is a list of all images on the page.
|
|
14
|
+
# header - the desired section header as a String.
|
|
15
|
+
# refs - True if desired output is a list of all references on the
|
|
16
|
+
# page.
|
|
17
|
+
# sidebar_img - True if desired output is the image in the sidebar
|
|
18
|
+
# paragraphs - the number of paragraphs from the article as an Interger
|
|
19
|
+
#
|
|
20
|
+
# TODO
|
|
21
|
+
# sidebar - True if desired output is the contents of the sidebar.
|
|
22
|
+
#
|
|
23
|
+
# Takes options hash and sets instance variables, then calls appropriate
|
|
24
|
+
# method.
|
|
25
|
+
def initialize(title, options={})
|
|
26
|
+
@title = title
|
|
27
|
+
run(options)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Iterates over the options hash.
|
|
31
|
+
#
|
|
32
|
+
# hash - options hash
|
|
33
|
+
#
|
|
34
|
+
# Runs the appropriate method based on the options hash.
|
|
35
|
+
def run(hash)
|
|
36
|
+
hash.each do |key, value|
|
|
37
|
+
case key
|
|
38
|
+
when:img_list
|
|
39
|
+
image_list
|
|
40
|
+
when:header
|
|
41
|
+
header(value)
|
|
42
|
+
when:refs
|
|
43
|
+
find_ref_list
|
|
44
|
+
when:sidebar_img
|
|
45
|
+
sidebar_image
|
|
46
|
+
when:num_paragraphs
|
|
47
|
+
paragraphs(value)
|
|
48
|
+
when:sidebar
|
|
49
|
+
sidebar_image
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Memoized methods for each operation.
|
|
55
|
+
#
|
|
56
|
+
# Returns instance variable or runs method.
|
|
57
|
+
def paragraphs(value = 1)
|
|
58
|
+
@paragraphs ||= find_paragraphs(value)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def image_list
|
|
62
|
+
@image_list ||= find_image_list
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def header(header)
|
|
66
|
+
@header ||= find_header(header)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def ref_list
|
|
70
|
+
@ref_list ||= find_ref_list
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def sidebar_image
|
|
74
|
+
@sidebar_image ||= find_sidebar_image
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
# Finds the specified number of paragraphs on a page starting at the top.
|
|
80
|
+
#
|
|
81
|
+
# Returns an array of strings
|
|
82
|
+
def find_paragraphs(paras)
|
|
83
|
+
@paras = paras
|
|
84
|
+
api_contents = Call.call_api(@title, :prop => "extracts")
|
|
85
|
+
para = Text.new(api_contents)
|
|
86
|
+
@paragraphs = para.paragraph(@paras)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Finds all the media items on a page.
|
|
90
|
+
#
|
|
91
|
+
# Returns a hash with keys 'urls' and 'titles' which point to arrays of strings containg the information.
|
|
92
|
+
def find_image_list
|
|
93
|
+
api_contents = Call.call_api(@title, :img_list => true)
|
|
94
|
+
img_list = Media.new(api_contents, 'pages')
|
|
95
|
+
@image_list = img_list.list_images
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Find a header.
|
|
99
|
+
#
|
|
100
|
+
# Return a String containing all the content under a given header.
|
|
101
|
+
def find_header(head)
|
|
102
|
+
@head = head
|
|
103
|
+
api_contents = Call.call_api(@title, :prop => "extracts")
|
|
104
|
+
head_text = Text.new(api_contents)
|
|
105
|
+
@header = head_text.find_header(@head)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Find all the references on a page.
|
|
109
|
+
#
|
|
110
|
+
# Return nested arrays.
|
|
111
|
+
def find_ref_list
|
|
112
|
+
api_contents = Call.call_api(@title, :prop => "revisions", :rvprop => true)
|
|
113
|
+
f_ref = Text.new(api_contents, prop = 'revisions')
|
|
114
|
+
@ref_list = f_ref.refs
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Find the sidebar image, if one exists.
|
|
118
|
+
#
|
|
119
|
+
# Return a String.
|
|
120
|
+
def find_sidebar_image
|
|
121
|
+
api_contents = Call.call_api(@title, :prop => "revisions", :rvprop => true)
|
|
122
|
+
side_img_name = Text.new(api_contents, prop = 'revisions')
|
|
123
|
+
@sidebar_image = side_img_name.sidebar_image
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
module Wikiwhat
|
|
2
|
+
class WikiwhatError < StandardError
|
|
3
|
+
end
|
|
4
|
+
|
|
5
|
+
class Results
|
|
6
|
+
def initialize
|
|
7
|
+
@result = nil
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def pull_from_hash(hash, key)
|
|
11
|
+
@hash = hash
|
|
12
|
+
@key = key
|
|
13
|
+
|
|
14
|
+
if @hash.include?(@key)
|
|
15
|
+
@result = @hash[@key]
|
|
16
|
+
else
|
|
17
|
+
@hash.each_pair do |k, v|
|
|
18
|
+
if v.class == Hash
|
|
19
|
+
pull_from_hash(v, @key)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
@result
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Extract portions of text from Wiki article
|
|
28
|
+
class Text < Results
|
|
29
|
+
|
|
30
|
+
def initialize(api_return, prop='extract')
|
|
31
|
+
@request = self.pull_from_hash(api_return, prop)
|
|
32
|
+
if @request.class == Array
|
|
33
|
+
@request = self.pull_from_hash(@request[0], "*")
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Returns the requested number of paragraphs of a Wiki article
|
|
38
|
+
#
|
|
39
|
+
# quantity - the Number of paragraphs to be returned starting from the top
|
|
40
|
+
# of the article. Defaults is to get the first paragraph.
|
|
41
|
+
#
|
|
42
|
+
# Return an array of strings.
|
|
43
|
+
def paragraph(quantity)
|
|
44
|
+
# Break the article into individual paragraphs and store in an array.
|
|
45
|
+
start = @request.split("</p>")
|
|
46
|
+
|
|
47
|
+
# Re-add the closing paragraph HTML tags.
|
|
48
|
+
start.each do |string|
|
|
49
|
+
string << "</p>"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Check to make sure the quantity being requested is not more paragraphs
|
|
53
|
+
# than exist.
|
|
54
|
+
#
|
|
55
|
+
# Return the correct number of paragraphs assigned to new_arr
|
|
56
|
+
if start.length < quantity
|
|
57
|
+
quantity = start.length - 1
|
|
58
|
+
new_arr = start[0..quantity]
|
|
59
|
+
else
|
|
60
|
+
quantity = quantity - 1
|
|
61
|
+
new_arr = start[0..quantity]
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Find all paragraphs under a given heading
|
|
66
|
+
#
|
|
67
|
+
# header = the name of the header as a String
|
|
68
|
+
# paras = the number of paragraphs
|
|
69
|
+
#
|
|
70
|
+
# Return a String.
|
|
71
|
+
def find_header(header)
|
|
72
|
+
# Find the requested header
|
|
73
|
+
start = @request.index(header)
|
|
74
|
+
if start
|
|
75
|
+
# Find next instance of the tag.
|
|
76
|
+
end_first_tag = start + @request[start..-1].index("h2") + 3
|
|
77
|
+
# Find
|
|
78
|
+
start_next_tag = @request[end_first_tag..-1].index("h2") + end_first_tag - 2
|
|
79
|
+
# Select substring of requested text.
|
|
80
|
+
@request[end_first_tag..start_next_tag]
|
|
81
|
+
else
|
|
82
|
+
raise Wikiwhat::WikiwhatError.new("Sorry, that header isn't on this page.")
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Removes HTML tags from a String
|
|
87
|
+
#
|
|
88
|
+
# string - a String that contains HTML tags.
|
|
89
|
+
#
|
|
90
|
+
# Returns the string without HTML tags.
|
|
91
|
+
def only_text(string)
|
|
92
|
+
no_html_tags = string.gsub(/<\/?.*?>/,'')
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Return the text from the sidebar, if one exists
|
|
96
|
+
# def sidebar
|
|
97
|
+
# @sidebar = content_split(0)
|
|
98
|
+
# end
|
|
99
|
+
|
|
100
|
+
# Find the image from the sidebar, if one exists
|
|
101
|
+
#
|
|
102
|
+
# Return the url of the image as a String.
|
|
103
|
+
def sidebar_image
|
|
104
|
+
# Check to see if a sidebar image exists
|
|
105
|
+
if content_split(0)[/(image\s* =\s*).*?\w(\.\w\w(g|f))/]
|
|
106
|
+
# Grab the sidebar image title
|
|
107
|
+
image_name = content_split(0)[/(image\s* =\s*).*?\w(\.\w\w(g|f))/]
|
|
108
|
+
# Remove the 'image = ' part of the string
|
|
109
|
+
image_name = image_name.split("= ")[1]
|
|
110
|
+
# Call Wikipedia for image url
|
|
111
|
+
get_url = Wikiwhat::Call.call_api(('File:'+ image_name), :prop => "imageinfo", :iiprop => true)
|
|
112
|
+
# Pull url from hash
|
|
113
|
+
img_name_2 = pull_from_hash(get_url, "pages")
|
|
114
|
+
img_array = pull_from_hash(img_name_2, "imageinfo")
|
|
115
|
+
img_array[0]["url"]
|
|
116
|
+
else
|
|
117
|
+
# If no sidebar image exists, raise error.
|
|
118
|
+
raise Wikiwhat::WikiwhatError.new("Sorry, it looks like there is no sidebar image
|
|
119
|
+
on this page.")
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Find all references on a page.
|
|
124
|
+
#
|
|
125
|
+
# Return all refrences as an array of arrays.
|
|
126
|
+
#
|
|
127
|
+
# TODO: Currently nested array, want to return as array of strings.
|
|
128
|
+
def refs
|
|
129
|
+
@content = content_split(1, 2)
|
|
130
|
+
|
|
131
|
+
#add all references to an array. still in wiki markup
|
|
132
|
+
@content.scan(/<ref>(.*?)<\/ref>/)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
private
|
|
136
|
+
# Splits the content into side bar and everything else.
|
|
137
|
+
# This method is for Parsing methods that use the raw markup from the
|
|
138
|
+
# revisions call.
|
|
139
|
+
# Specify start as 0 for sidebar content, for everything else specify
|
|
140
|
+
# 'content_split(1, -1)'
|
|
141
|
+
#
|
|
142
|
+
# TODO:split the content from the catagory info
|
|
143
|
+
def content_split(start, finish=nil)
|
|
144
|
+
@content = @request.split("'''")
|
|
145
|
+
if finish == nil
|
|
146
|
+
@content[start]
|
|
147
|
+
else
|
|
148
|
+
@content[start..finish].join
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
class Media < Results
|
|
154
|
+
attr_reader :api_return
|
|
155
|
+
def initialize(api_return, prop)
|
|
156
|
+
@request = self.pull_from_hash(api_return, prop)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Find all the media items on a page.
|
|
160
|
+
#
|
|
161
|
+
# Return a hash containing an array of urls and an array of titles.
|
|
162
|
+
def list_images
|
|
163
|
+
# Call API for initial list of images
|
|
164
|
+
isolated_list = @request
|
|
165
|
+
# Parse JSON object for list of image titles
|
|
166
|
+
image_title_array = []
|
|
167
|
+
isolated_list.collect do |key, value|
|
|
168
|
+
image_title_array << value["title"]
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Make API call for individual image links
|
|
172
|
+
img_url_call_array = []
|
|
173
|
+
image_title_array.each do |title|
|
|
174
|
+
img_url_call_array << Wikiwhat::Call.call_api(title,
|
|
175
|
+
:prop => "imageinfo", :iiprop => true)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Pull pages object containing imageinfo array out from JSON object
|
|
179
|
+
imageinfo_array = []
|
|
180
|
+
img_url_call_array.each do |object|
|
|
181
|
+
imageinfo_array << pull_from_hash(object, "pages")
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Pull imageinfo array out of nested hash
|
|
185
|
+
info_array = []
|
|
186
|
+
imageinfo_array.each do |object|
|
|
187
|
+
info_array << pull_from_hash(object, "imageinfo")
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Pull each URL and place in an array
|
|
191
|
+
url_array = []
|
|
192
|
+
info_array.each do |array|
|
|
193
|
+
url_array << array[0]["url"]
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
return {urls: url_array, titles: image_title_array }
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|