mediawiki-keiki 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ # gem 'httparty'
6
+ # gem 'wikicloth'
7
+ # gem 'nokogiri'
8
+
9
+ # group :test do
10
+ # gem 'webmock'
11
+ # gem 'vcr'
12
+ # gem 'turn'
13
+ # gem 'rake'
14
+ # gem 'pry'
15
+ # gem 'minitest-reporters'
16
+ # end
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2010-2014 Google, Inc. http://angularjs.org
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,14 @@
1
+ # MediaWiki-Keiki
2
+
3
+ A Ruby API client for the [MediaWiki API](http://www.mediawiki.org/wiki/API:Main_page)
4
+
5
+ ## Features
6
+
7
+
8
+ ## Links
9
+
10
+
11
+ ## Installation
12
+
13
+
14
+ ## Usage
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.test_files = FileList['spec/lib/mediawiki-keiki/*_spec.rb']
5
+ t.verbose = true
6
+ end
7
+
8
+ task :default => :test
@@ -0,0 +1,48 @@
1
+ module MediaWiki
2
+
3
+ class Page
4
+
5
+ attr_reader :page_data
6
+
7
+ def initialize(hash)
8
+ @page_data = hash
9
+ end
10
+
11
+ # Modify method_missing to return results if a matching key can be found within the page hash
12
+ def method_missing(name, *args, &block)
13
+ @page_data.has_key?(name.to_s) ? @page_data[name.to_s] : super
14
+ end
15
+
16
+ # Gets the content by looking inside the 'revisions' key in the page hash
17
+ def content
18
+ revisions[0]["*"]
19
+ end
20
+
21
+ # Uses the WikiCloth gem to convert the content from WikiMarkup to HTML
22
+ def to_html
23
+ WikiCloth::Parser.new( :data => content ).to_html
24
+ end
25
+
26
+ # Converts the content to plain text
27
+ def to_text
28
+ Nokogiri::HTML(to_html).text
29
+ end
30
+
31
+ # Returns a short summary that is at least 140 characters long
32
+ def summary
33
+ text_array = to_text.split("\n")
34
+ text = text_array[0]
35
+ i = 1
36
+
37
+ while text.length <= 140 && i < text_array.length
38
+ text << "\n" + text_array[i]
39
+ i += 1
40
+ end
41
+
42
+ text
43
+
44
+ end
45
+
46
+ end
47
+
48
+ end
@@ -0,0 +1,56 @@
1
+ module MediaWiki
2
+
3
+ class Query
4
+
5
+ attr_accessor :query, :query_result
6
+
7
+ include HTTParty
8
+
9
+ # Sets the User-Agent header for the HTTP GET request
10
+ headers 'User-Agent' => 'crawler/1.0 (https://github.com/mkulumadzi/crawler)'
11
+
12
+ # Sets the base-uri to the Wikimedia API endpoint
13
+ base_uri 'https://en.wikipedia.org'
14
+
15
+
16
+ def initialize(query)
17
+
18
+ # The WikiMedia API requires that requests be limited to 50 sites or less
19
+ raise ArgumentError, "Query exceeds WikiMedia maximum number of sites (50)" unless query.count("|") < 50
20
+
21
+ @query = query
22
+ @page_hash = Hash.new
23
+
24
+ end
25
+
26
+ # Unless force is true, uses existing query_result if it has already been cached
27
+ def query_result(force = false)
28
+ force ? @query_result = get_query_result : @query_result ||= get_query_result
29
+ end
30
+
31
+ # Returns a hash filled with Pages
32
+ def pages
33
+
34
+ # Captures the original query and sorts it, for using as keys with the hash
35
+ key_array = @query.split('|').sort
36
+ i = 0
37
+
38
+ # Creates a hash, using the original query as the keys and new Site objects as the values
39
+ query_result["query"]["pages"].each do |key, value|
40
+ @page_hash[key_array[i]] = MediaWiki::Page.new(value)
41
+ i += 1
42
+ end
43
+
44
+ @page_hash
45
+ end
46
+
47
+ private
48
+
49
+ # Private method that gets called if the query_result has not been retrieved yet
50
+ def get_query_result
51
+ self.class.get URI.encode("/w/api.php?continue=&format=json&action=query&titles=#{@query}&prop=revisions&rvprop=content&redirects")
52
+ end
53
+
54
+ end
55
+
56
+ end
@@ -0,0 +1,7 @@
1
+ require 'httparty'
2
+ require 'wikicloth'
3
+ require 'nokogiri'
4
+
5
+ Dir[File.dirname(__FILE__) + '/mediawiki-keiki/*.rb'].each do |file|
6
+ require file
7
+ end
@@ -0,0 +1,84 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe MediaWiki::Page do
4
+
5
+ describe "Retrieve a page" do
6
+
7
+ let(:page) { MediaWiki::Query.new('foo').pages['foo'] }
8
+
9
+ before do
10
+ VCR.insert_cassette 'page', :record => :new_episodes
11
+ end
12
+
13
+ after do
14
+ VCR.eject_cassette
15
+ end
16
+
17
+ describe "get valid page" do
18
+
19
+ it "must get a valid page from WikiQuery pages" do
20
+ page.must_be_instance_of MediaWiki::Page
21
+ end
22
+
23
+ it "must store the page data as a hash" do
24
+ page.page_data.must_be_instance_of Hash
25
+ end
26
+
27
+ it "must return the page title" do
28
+ page.title.must_equal "Foobar"
29
+ end
30
+
31
+ it "must raise method missing if attribute is not present" do
32
+ lambda { page.foo_attribute }.must_raise NoMethodError
33
+ end
34
+
35
+ it "must not point to a redirect" do
36
+ /REDIRECT/.match(page.content).must_equal nil
37
+ end
38
+
39
+ it "must return the page content" do
40
+ page.content.must_be_instance_of String
41
+ end
42
+
43
+ it "must parse the page content as html" do
44
+ page.to_html.include?('<a href=').must_equal true
45
+ end
46
+
47
+ it "must parse the page content to plain text" do
48
+ page.to_text.include?('<a href=').must_equal false
49
+ end
50
+
51
+ it "must return a summary of the text" do
52
+ page.summary.must_be_instance_of String
53
+ end
54
+
55
+ it "must return at least 140 characters in the short summary" do
56
+ assert_operator(page.summary.length, :>=, 140)
57
+ end
58
+
59
+ end
60
+
61
+ describe "missing page" do
62
+
63
+ let(:page) { MediaWiki::Query.new('Foolicious').pages['Foolicious'] }
64
+
65
+ it "must be flagged as missing" do
66
+ page.missing.must_equal ""
67
+ end
68
+
69
+ end
70
+
71
+ describe "get invalid page" do
72
+
73
+ let(:page) { MediaWiki::Query.new('Talk:').pages['Talk:'] }
74
+
75
+ it "must be flagged as invalid" do
76
+ page.invalid.must_equal ""
77
+ end
78
+
79
+ end
80
+
81
+ end
82
+
83
+ end
84
+
@@ -0,0 +1,135 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe MediaWiki::Query do
4
+
5
+ describe "default attributes" do
6
+
7
+ it "must include httparty methods" do
8
+ MediaWiki::Query.must_include HTTParty
9
+ end
10
+
11
+ it "must have the base url set to the Wikipedia API endpoint" do
12
+ MediaWiki::Query.base_uri.must_equal 'https://en.wikipedia.org'
13
+ end
14
+
15
+ it "must have the User-Agent header" do
16
+ MediaWiki::Query.headers["User-Agent"].must_equal "crawler/1.0 (https://github.com/mkulumadzi/crawler)"
17
+ end
18
+
19
+ end
20
+
21
+ describe "default instance attributes" do
22
+
23
+ let(:wiki_query) { MediaWiki::Query.new('foo')}
24
+
25
+ it "must have a query" do
26
+ wiki_query.must_respond_to :query
27
+ end
28
+
29
+ it "must have the right query" do
30
+ wiki_query.query.must_equal 'foo'
31
+ end
32
+
33
+ end
34
+
35
+ describe "GET site" do
36
+
37
+ let(:wiki_query) { MediaWiki::Query.new('foo') }
38
+
39
+ before do
40
+ VCR.insert_cassette 'wiki_query', :record => :new_episodes
41
+ end
42
+
43
+ after do
44
+ VCR.eject_cassette
45
+ end
46
+
47
+ it "records the fixture" do
48
+ MediaWiki::Query.get('/w/api.php?continue=&format=json&action=query&titles=foo&prop=revisions&rvprop=content&redirects')
49
+ end
50
+
51
+ it "must have a query result method" do
52
+ wiki_query.must_respond_to :query_result
53
+ end
54
+
55
+ it "must parse the api response from JSON to Hash" do
56
+ wiki_query.query_result.must_be_instance_of Hash
57
+ end
58
+
59
+ it "must perform the request and get data" do
60
+ wiki_query.query_result["batchcomplete"].must_equal ""
61
+ end
62
+
63
+ describe "dynamic attributes" do
64
+
65
+ before do
66
+ wiki_query.query_result
67
+ end
68
+
69
+ it "must return the pages as a hash" do
70
+ wiki_query.pages.must_be_instance_of Hash
71
+ end
72
+
73
+ it "must use keys in pages that match the original search terms" do
74
+ wiki_query.pages.keys[0].must_equal 'foo'
75
+ end
76
+
77
+ it "must store the pages as Site classes" do
78
+ wiki_query.pages['foo'].must_be_instance_of MediaWiki::Page
79
+ end
80
+
81
+ end
82
+
83
+ describe "caching" do
84
+
85
+ # Use Webmock to disable the network connection after fetching the profile
86
+ before do
87
+ wiki_query.query_result
88
+ stub_request(:any, /en.wikipedia.org/).to_timeout
89
+ end
90
+
91
+ it "must cache the query result" do
92
+ wiki_query.query_result.must_be_instance_of Hash
93
+ end
94
+
95
+ it "must refresh the profile if forced" do
96
+ lambda { wiki_query.query_result(true) }.must_raise Timeout::Error
97
+ end
98
+
99
+ end
100
+
101
+ describe "multiple sites" do
102
+
103
+ let(:wiki_query) { MediaWiki::Query.new('foo|bar|camp') }
104
+
105
+ it "must return all of the sites" do
106
+ wiki_query.pages.length.must_equal 3
107
+ end
108
+
109
+ it "must tag the sites with the right query terms" do
110
+ wiki_query.pages['foo'].title.must_equal 'Foobar'
111
+ wiki_query.pages['bar'].title.must_equal 'Bar'
112
+ wiki_query.pages['camp'].title.must_equal 'Camp'
113
+ end
114
+
115
+ end
116
+
117
+ describe "give warning if WikiMedia API limit of 50 sites exceeded" do
118
+
119
+ search_string = ""
120
+
121
+ ("1".."51").each { |x| search_string << x + "|"}
122
+
123
+ search_string = search_string.chomp("|")
124
+
125
+ it "must throw an error if search string has more than 50 sites" do
126
+ assert_raises ArgumentError do
127
+ MediaWiki::Query.new("#{search_string}".to_s)
128
+ end
129
+ end
130
+
131
+ end
132
+
133
+ end
134
+
135
+ end
@@ -0,0 +1,19 @@
1
+ #Load the MediaWiki file
2
+ require_relative '../lib/mediawiki-keiki'
3
+
4
+ #dependencies
5
+ require 'minitest/autorun'
6
+ require 'webmock/minitest'
7
+ require 'vcr'
8
+ require 'pry'
9
+ require 'minitest/reporters'
10
+
11
+ #Minitest reporter
12
+ reporter_options = { color: true}
13
+ Minitest::Reporters.use! [Minitest::Reporters::DefaultReporter.new(reporter_options)]
14
+
15
+ #VCR config
16
+ VCR.configure do |c|
17
+ c.cassette_library_dir = 'spec/fixtures/mediawiki_keiki_cassettes'
18
+ c.hook_into :webmock
19
+ end
metadata ADDED
@@ -0,0 +1,201 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mediawiki-keiki
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Evan Waters
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-12-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: httparty
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.13.3
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.13.3
30
+ - !ruby/object:Gem::Dependency
31
+ name: wikicloth
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 0.8.1
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 0.8.1
46
+ - !ruby/object:Gem::Dependency
47
+ name: nokogiri
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 1.6.5
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.6.5
62
+ - !ruby/object:Gem::Dependency
63
+ name: webmock
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: 1.20.4
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 1.20.4
78
+ - !ruby/object:Gem::Dependency
79
+ name: vcr
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: 2.9.3
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: 2.9.3
94
+ - !ruby/object:Gem::Dependency
95
+ name: turn
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: 0.9.7
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: 0.9.7
110
+ - !ruby/object:Gem::Dependency
111
+ name: rake
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: 10.4.2
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ~>
124
+ - !ruby/object:Gem::Version
125
+ version: 10.4.2
126
+ - !ruby/object:Gem::Dependency
127
+ name: pry
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: 0.10.1
134
+ type: :runtime
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ version: 0.10.1
142
+ - !ruby/object:Gem::Dependency
143
+ name: minitest-reporters
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ~>
148
+ - !ruby/object:Gem::Version
149
+ version: 0.14.24
150
+ type: :runtime
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ~>
156
+ - !ruby/object:Gem::Version
157
+ version: 0.14.24
158
+ description: A client for the MediaWiki API
159
+ email:
160
+ - evan.waters@gmail.com
161
+ executables: []
162
+ extensions: []
163
+ extra_rdoc_files:
164
+ - README.md
165
+ files:
166
+ - Rakefile
167
+ - LICENSE
168
+ - README.md
169
+ - Gemfile
170
+ - lib/mediawiki-keiki.rb
171
+ - lib/mediawiki-keiki/page.rb
172
+ - lib/mediawiki-keiki/query.rb
173
+ - spec/spec_helper.rb
174
+ - spec/lib/mediawiki-keiki/page_spec.rb
175
+ - spec/lib/mediawiki-keiki/query_spec.rb
176
+ homepage: https://github.com/mkulumadzi/mediawiki-keiki
177
+ licenses:
178
+ - MIT
179
+ post_install_message:
180
+ rdoc_options: []
181
+ require_paths:
182
+ - lib
183
+ required_ruby_version: !ruby/object:Gem::Requirement
184
+ none: false
185
+ requirements:
186
+ - - ! '>='
187
+ - !ruby/object:Gem::Version
188
+ version: '0'
189
+ required_rubygems_version: !ruby/object:Gem::Requirement
190
+ none: false
191
+ requirements:
192
+ - - ! '>='
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ requirements: []
196
+ rubyforge_project:
197
+ rubygems_version: 1.8.23
198
+ signing_key:
199
+ specification_version: 3
200
+ summary: MediaWiki Keiki
201
+ test_files: []