mediawiki-keiki 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ # gem 'httparty'
6
+ # gem 'wikicloth'
7
+ # gem 'nokogiri'
8
+
9
+ # group :test do
10
+ # gem 'webmock'
11
+ # gem 'vcr'
12
+ # gem 'turn'
13
+ # gem 'rake'
14
+ # gem 'pry'
15
+ # gem 'minitest-reporters'
16
+ # end
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2010-2014 Google, Inc. http://angularjs.org
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,14 @@
1
+ # MediaWiki-Keiki
2
+
3
+ A Ruby API client for the [MediaWiki API](http://www.mediawiki.org/wiki/API:Main_page)
4
+
5
+ ## Features
6
+
7
+
8
+ ## Links
9
+
10
+
11
+ ## Installation
12
+
13
+
14
+ ## Usage
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.test_files = FileList['spec/lib/mediawiki-keiki/*_spec.rb']
5
+ t.verbose = true
6
+ end
7
+
8
+ task :default => :test
@@ -0,0 +1,48 @@
1
+ module MediaWiki
2
+
3
+ class Page
4
+
5
+ attr_reader :page_data
6
+
7
+ def initialize(hash)
8
+ @page_data = hash
9
+ end
10
+
11
+ # Modify method_missing to return results if a matching key can be found within the page hash
12
+ def method_missing(name, *args, &block)
13
+ @page_data.has_key?(name.to_s) ? @page_data[name.to_s] : super
14
+ end
15
+
16
+ # Gets the content by looking inside the 'revisions' key in the page hash
17
+ def content
18
+ revisions[0]["*"]
19
+ end
20
+
21
+ # Uses the WikiCloth gem to convert the content from WikiMarkup to HTML
22
+ def to_html
23
+ WikiCloth::Parser.new( :data => content ).to_html
24
+ end
25
+
26
+ # Converts the content to plain text
27
+ def to_text
28
+ Nokogiri::HTML(to_html).text
29
+ end
30
+
31
+ # Returns a short summary that is at least 140 characters long
32
+ def summary
33
+ text_array = to_text.split("\n")
34
+ text = text_array[0]
35
+ i = 1
36
+
37
+ while text.length <= 140 && i < text_array.length
38
+ text << "\n" + text_array[i]
39
+ i += 1
40
+ end
41
+
42
+ text
43
+
44
+ end
45
+
46
+ end
47
+
48
+ end
@@ -0,0 +1,56 @@
1
+ module MediaWiki
2
+
3
+ class Query
4
+
5
+ attr_accessor :query, :query_result
6
+
7
+ include HTTParty
8
+
9
+ # Sets the User-Agent header for the HTTP GET request
10
+ headers 'User-Agent' => 'crawler/1.0 (https://github.com/mkulumadzi/crawler)'
11
+
12
+ # Sets the base-uri to the Wikimedia API endpoint
13
+ base_uri 'https://en.wikipedia.org'
14
+
15
+
16
+ def initialize(query)
17
+
18
+ # The WikiMedia API requires that requests be limited to 50 sites or less
19
+ raise ArgumentError, "Query exceeds WikiMedia maximum number of sites (50)" unless query.count("|") < 50
20
+
21
+ @query = query
22
+ @page_hash = Hash.new
23
+
24
+ end
25
+
26
+ # Unless force is true, uses existing query_result if it has already been cached
27
+ def query_result(force = false)
28
+ force ? @query_result = get_query_result : @query_result ||= get_query_result
29
+ end
30
+
31
+ # Returns a hash filled with Pages
32
+ def pages
33
+
34
+ # Captures the original query and sorts it, for using as keys with the hash
35
+ key_array = @query.split('|').sort
36
+ i = 0
37
+
38
+ # Creates a hash, using the original query as the keys and new Site objects as the values
39
+ query_result["query"]["pages"].each do |key, value|
40
+ @page_hash[key_array[i]] = MediaWiki::Page.new(value)
41
+ i += 1
42
+ end
43
+
44
+ @page_hash
45
+ end
46
+
47
+ private
48
+
49
+ # Private method that gets called if the query_result has not been retrieved yet
50
+ def get_query_result
51
+ self.class.get URI.encode("/w/api.php?continue=&format=json&action=query&titles=#{@query}&prop=revisions&rvprop=content&redirects")
52
+ end
53
+
54
+ end
55
+
56
+ end
@@ -0,0 +1,7 @@
1
+ require 'httparty'
2
+ require 'wikicloth'
3
+ require 'nokogiri'
4
+
5
+ Dir[File.dirname(__FILE__) + '/mediawiki-keiki/*.rb'].each do |file|
6
+ require file
7
+ end
@@ -0,0 +1,84 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe MediaWiki::Page do
4
+
5
+ describe "Retrieve a page" do
6
+
7
+ let(:page) { MediaWiki::Query.new('foo').pages['foo'] }
8
+
9
+ before do
10
+ VCR.insert_cassette 'page', :record => :new_episodes
11
+ end
12
+
13
+ after do
14
+ VCR.eject_cassette
15
+ end
16
+
17
+ describe "get valid page" do
18
+
19
+ it "must get a valid page from WikiQuery pages" do
20
+ page.must_be_instance_of MediaWiki::Page
21
+ end
22
+
23
+ it "must store the page data as a hash" do
24
+ page.page_data.must_be_instance_of Hash
25
+ end
26
+
27
+ it "must return the page title" do
28
+ page.title.must_equal "Foobar"
29
+ end
30
+
31
+ it "must raise method missing if attribute is not present" do
32
+ lambda { page.foo_attribute }.must_raise NoMethodError
33
+ end
34
+
35
+ it "must not point to a redirect" do
36
+ /REDIRECT/.match(page.content).must_equal nil
37
+ end
38
+
39
+ it "must return the page content" do
40
+ page.content.must_be_instance_of String
41
+ end
42
+
43
+ it "must parse the page content as html" do
44
+ page.to_html.include?('<a href=').must_equal true
45
+ end
46
+
47
+ it "must parse the page content to plain text" do
48
+ page.to_text.include?('<a href=').must_equal false
49
+ end
50
+
51
+ it "must return a summary of the text" do
52
+ page.summary.must_be_instance_of String
53
+ end
54
+
55
+ it "must return at least 140 characters in the short summary" do
56
+ assert_operator(page.summary.length, :>=, 140)
57
+ end
58
+
59
+ end
60
+
61
+ describe "missing page" do
62
+
63
+ let(:page) { MediaWiki::Query.new('Foolicious').pages['Foolicious'] }
64
+
65
+ it "must be flagged as missing" do
66
+ page.missing.must_equal ""
67
+ end
68
+
69
+ end
70
+
71
+ describe "get invalid page" do
72
+
73
+ let(:page) { MediaWiki::Query.new('Talk:').pages['Talk:'] }
74
+
75
+ it "must be flagged as invalid" do
76
+ page.invalid.must_equal ""
77
+ end
78
+
79
+ end
80
+
81
+ end
82
+
83
+ end
84
+
@@ -0,0 +1,135 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe MediaWiki::Query do
4
+
5
+ describe "default attributes" do
6
+
7
+ it "must include httparty methods" do
8
+ MediaWiki::Query.must_include HTTParty
9
+ end
10
+
11
+ it "must have the base url set to the Wikipedia API endpoint" do
12
+ MediaWiki::Query.base_uri.must_equal 'https://en.wikipedia.org'
13
+ end
14
+
15
+ it "must have the User-Agent header" do
16
+ MediaWiki::Query.headers["User-Agent"].must_equal "crawler/1.0 (https://github.com/mkulumadzi/crawler)"
17
+ end
18
+
19
+ end
20
+
21
+ describe "default instance attributes" do
22
+
23
+ let(:wiki_query) { MediaWiki::Query.new('foo')}
24
+
25
+ it "must have a query" do
26
+ wiki_query.must_respond_to :query
27
+ end
28
+
29
+ it "must have the right query" do
30
+ wiki_query.query.must_equal 'foo'
31
+ end
32
+
33
+ end
34
+
35
+ describe "GET site" do
36
+
37
+ let(:wiki_query) { MediaWiki::Query.new('foo') }
38
+
39
+ before do
40
+ VCR.insert_cassette 'wiki_query', :record => :new_episodes
41
+ end
42
+
43
+ after do
44
+ VCR.eject_cassette
45
+ end
46
+
47
+ it "records the fixture" do
48
+ MediaWiki::Query.get('/w/api.php?continue=&format=json&action=query&titles=foo&prop=revisions&rvprop=content&redirects')
49
+ end
50
+
51
+ it "must have a query result method" do
52
+ wiki_query.must_respond_to :query_result
53
+ end
54
+
55
+ it "must parse the api response from JSON to Hash" do
56
+ wiki_query.query_result.must_be_instance_of Hash
57
+ end
58
+
59
+ it "must perform the request and get data" do
60
+ wiki_query.query_result["batchcomplete"].must_equal ""
61
+ end
62
+
63
+ describe "dynamic attributes" do
64
+
65
+ before do
66
+ wiki_query.query_result
67
+ end
68
+
69
+ it "must return the pages as a hash" do
70
+ wiki_query.pages.must_be_instance_of Hash
71
+ end
72
+
73
+ it "must use keys in pages that match the original search terms" do
74
+ wiki_query.pages.keys[0].must_equal 'foo'
75
+ end
76
+
77
+ it "must store the pages as Site classes" do
78
+ wiki_query.pages['foo'].must_be_instance_of MediaWiki::Page
79
+ end
80
+
81
+ end
82
+
83
+ describe "caching" do
84
+
85
+ # Use Webmock to disable the network connection after fetching the profile
86
+ before do
87
+ wiki_query.query_result
88
+ stub_request(:any, /en.wikipedia.org/).to_timeout
89
+ end
90
+
91
+ it "must cache the query result" do
92
+ wiki_query.query_result.must_be_instance_of Hash
93
+ end
94
+
95
+ it "must refresh the profile if forced" do
96
+ lambda { wiki_query.query_result(true) }.must_raise Timeout::Error
97
+ end
98
+
99
+ end
100
+
101
+ describe "multiple sites" do
102
+
103
+ let(:wiki_query) { MediaWiki::Query.new('foo|bar|camp') }
104
+
105
+ it "must return all of the sites" do
106
+ wiki_query.pages.length.must_equal 3
107
+ end
108
+
109
+ it "must tag the sites with the right query terms" do
110
+ wiki_query.pages['foo'].title.must_equal 'Foobar'
111
+ wiki_query.pages['bar'].title.must_equal 'Bar'
112
+ wiki_query.pages['camp'].title.must_equal 'Camp'
113
+ end
114
+
115
+ end
116
+
117
+ describe "give warning if WikiMedia API limit of 50 sites exceeded" do
118
+
119
+ search_string = ""
120
+
121
+ ("1".."51").each { |x| search_string << x + "|"}
122
+
123
+ search_string = search_string.chomp("|")
124
+
125
+ it "must throw an error if search string has more than 50 sites" do
126
+ assert_raises ArgumentError do
127
+ MediaWiki::Query.new("#{search_string}".to_s)
128
+ end
129
+ end
130
+
131
+ end
132
+
133
+ end
134
+
135
+ end
@@ -0,0 +1,19 @@
1
+ #Load the MediaWiki file
2
+ require_relative '../lib/mediawiki-keiki'
3
+
4
+ #dependencies
5
+ require 'minitest/autorun'
6
+ require 'webmock/minitest'
7
+ require 'vcr'
8
+ require 'pry'
9
+ require 'minitest/reporters'
10
+
11
+ #Minitest reporter
12
+ reporter_options = { color: true}
13
+ Minitest::Reporters.use! [Minitest::Reporters::DefaultReporter.new(reporter_options)]
14
+
15
+ #VCR config
16
+ VCR.configure do |c|
17
+ c.cassette_library_dir = 'spec/fixtures/mediawiki_keiki_cassettes'
18
+ c.hook_into :webmock
19
+ end
metadata ADDED
@@ -0,0 +1,201 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mediawiki-keiki
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Evan Waters
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-12-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: httparty
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.13.3
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.13.3
30
+ - !ruby/object:Gem::Dependency
31
+ name: wikicloth
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 0.8.1
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 0.8.1
46
+ - !ruby/object:Gem::Dependency
47
+ name: nokogiri
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 1.6.5
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.6.5
62
+ - !ruby/object:Gem::Dependency
63
+ name: webmock
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: 1.20.4
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 1.20.4
78
+ - !ruby/object:Gem::Dependency
79
+ name: vcr
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: 2.9.3
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: 2.9.3
94
+ - !ruby/object:Gem::Dependency
95
+ name: turn
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: 0.9.7
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: 0.9.7
110
+ - !ruby/object:Gem::Dependency
111
+ name: rake
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: 10.4.2
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ~>
124
+ - !ruby/object:Gem::Version
125
+ version: 10.4.2
126
+ - !ruby/object:Gem::Dependency
127
+ name: pry
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: 0.10.1
134
+ type: :runtime
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ version: 0.10.1
142
+ - !ruby/object:Gem::Dependency
143
+ name: minitest-reporters
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ~>
148
+ - !ruby/object:Gem::Version
149
+ version: 0.14.24
150
+ type: :runtime
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ~>
156
+ - !ruby/object:Gem::Version
157
+ version: 0.14.24
158
+ description: A client for the MediaWiki API
159
+ email:
160
+ - evan.waters@gmail.com
161
+ executables: []
162
+ extensions: []
163
+ extra_rdoc_files:
164
+ - README.md
165
+ files:
166
+ - Rakefile
167
+ - LICENSE
168
+ - README.md
169
+ - Gemfile
170
+ - lib/mediawiki-keiki.rb
171
+ - lib/mediawiki-keiki/page.rb
172
+ - lib/mediawiki-keiki/query.rb
173
+ - spec/spec_helper.rb
174
+ - spec/lib/mediawiki-keiki/page_spec.rb
175
+ - spec/lib/mediawiki-keiki/query_spec.rb
176
+ homepage: https://github.com/mkulumadzi/mediawiki-keiki
177
+ licenses:
178
+ - MIT
179
+ post_install_message:
180
+ rdoc_options: []
181
+ require_paths:
182
+ - lib
183
+ required_ruby_version: !ruby/object:Gem::Requirement
184
+ none: false
185
+ requirements:
186
+ - - ! '>='
187
+ - !ruby/object:Gem::Version
188
+ version: '0'
189
+ required_rubygems_version: !ruby/object:Gem::Requirement
190
+ none: false
191
+ requirements:
192
+ - - ! '>='
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ requirements: []
196
+ rubyforge_project:
197
+ rubygems_version: 1.8.23
198
+ signing_key:
199
+ specification_version: 3
200
+ summary: MediaWiki Keiki
201
+ test_files: []