wikipedia-client 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/MIT-LICENSE +20 -0
- data/README.textile +81 -0
- data/Rakefile +60 -0
- data/VERSION +1 -0
- data/init.rb +1 -0
- data/install.rb +1 -0
- data/lib/wikipedia.rb +37 -0
- data/lib/wikipedia/client.rb +91 -0
- data/lib/wikipedia/configuration.rb +25 -0
- data/lib/wikipedia/page.rb +109 -0
- data/lib/wikipedia/url.rb +14 -0
- data/script/add_sanitization_test +22 -0
- data/spec/fixtures/Edsger_Dijkstra.json +1 -0
- data/spec/fixtures/Edsger_Dijkstra.yaml +184 -0
- data/spec/fixtures/Edsger_Dijkstra_section_0.json +1 -0
- data/spec/fixtures/Edsger_content.txt +1 -0
- data/spec/fixtures/File_Edsger_Wybe_Dijkstra_jpg.json +1 -0
- data/spec/fixtures/sanitization_samples/Ceawlin_of_Wessex-raw.txt +19 -0
- data/spec/fixtures/sanitization_samples/Ceawlin_of_Wessex-sanitized.txt +3 -0
- data/spec/fixtures/sanitization_samples/Edsger_W_Dijkstra-raw.txt +26 -0
- data/spec/fixtures/sanitization_samples/Edsger_W_Dijkstra-sanitized.txt +2 -0
- data/spec/fixtures/sanitization_samples/Flower_video_game-raw.txt +25 -0
- data/spec/fixtures/sanitization_samples/Flower_video_game-sanitized.txt +2 -0
- data/spec/fixtures/sanitization_samples/How_to_Lose_Friends__Alienate_People_film-raw.txt +28 -0
- data/spec/fixtures/sanitization_samples/How_to_Lose_Friends__Alienate_People_film-sanitized.txt +2 -0
- data/spec/fixtures/sanitization_samples/Kirsten_Dunst-raw.txt +16 -0
- data/spec/fixtures/sanitization_samples/Kirsten_Dunst-sanitized.txt +3 -0
- data/spec/fixtures/sanitization_samples/Large_Hadron_Collider-raw.txt +104 -0
- data/spec/fixtures/sanitization_samples/Large_Hadron_Collider-sanitized.txt +4 -0
- data/spec/fixtures/sanitization_samples/Metro_Goldwyn_Mayer-raw.txt +18 -0
- data/spec/fixtures/sanitization_samples/Metro_Goldwyn_Mayer-sanitized.txt +1 -0
- data/spec/fixtures/sanitization_samples/Middle_Ages-raw.txt +10 -0
- data/spec/fixtures/sanitization_samples/Middle_Ages-sanitized.txt +3 -0
- data/spec/fixtures/sanitization_samples/SMS_Elbing-raw.txt +51 -0
- data/spec/fixtures/sanitization_samples/SMS_Elbing-sanitized.txt +1 -0
- data/spec/fixtures/sanitization_samples/Sashimi-raw.txt +16 -0
- data/spec/fixtures/sanitization_samples/Sashimi-sanitized.txt +7 -0
- data/spec/fixtures/sanitization_samples/Superb_Fairywren-raw.txt +35 -0
- data/spec/fixtures/sanitization_samples/Superb_Fairywren-sanitized.txt +3 -0
- data/spec/fixtures/sanitization_samples/Velociraptor-raw.txt +28 -0
- data/spec/fixtures/sanitization_samples/Velociraptor-sanitized.txt +3 -0
- data/spec/lib/client_spec.rb +108 -0
- data/spec/lib/sanitize_spec.rb +14 -0
- data/spec/lib/url_spec.rb +8 -0
- data/spec/lib/wikipedia_spec.rb +20 -0
- data/spec/spec_helper.rb +4 -0
- data/tasks/wikipedia_tasks.rake +4 -0
- data/uninstall.rb +1 -0
- data/wikipedia-client.gemspec +96 -0
- metadata +134 -0
    
        data/MIT-LICENSE
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            Copyright (c) 2008 [name of plugin creator]
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Permission is hereby granted, free of charge, to any person obtaining
         | 
| 4 | 
            +
            a copy of this software and associated documentation files (the
         | 
| 5 | 
            +
            "Software"), to deal in the Software without restriction, including
         | 
| 6 | 
            +
            without limitation the rights to use, copy, modify, merge, publish,
         | 
| 7 | 
            +
            distribute, sublicense, and/or sell copies of the Software, and to
         | 
| 8 | 
            +
            permit persons to whom the Software is furnished to do so, subject to
         | 
| 9 | 
            +
            the following conditions:
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            The above copyright notice and this permission notice shall be
         | 
| 12 | 
            +
            included in all copies or substantial portions of the Software.
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
         | 
| 15 | 
            +
            EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
         | 
| 16 | 
            +
            MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
         | 
| 17 | 
            +
            NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
         | 
| 18 | 
            +
            LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
         | 
| 19 | 
            +
            OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
         | 
| 20 | 
            +
            WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
         | 
    
        data/README.textile
    ADDED
    
    | @@ -0,0 +1,81 @@ | |
| 1 | 
            +
            h1. Wikipedia
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Allows you to get wikipedia content through their API. This uses the
         | 
| 4 | 
            +
            alpha API, not the deprecated query.php API type
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            Wikipedia API reference: "http://en.wikipedia.org/w/api.php":http://en.wikipedia.org/w/api.php
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            Adopted from: "http://code.google.com/p/wikipedia-client/":http://code.google.com/p/wikipedia-client/
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            h2. Examples
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            <pre><code>require 'wikipedia'
         | 
| 13 | 
            +
            page = Wikipedia.find( 'Getting Things Done' )
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            => #<Wikipedia:Page>
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            page.title
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            => 'Getting Things Done'
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            page.content
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            => # all the wiki markup appears here...
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            page.categories
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            => [..., "Category:Self-help books", ...]
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            page.links
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            => [..., "Business", "Cult following", ...]
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            page.images
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            => ["File:Getting Things Done.jpg", ...]
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            page.image_urls
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            => ["http://upload.wikimedia.org/wikipedia/en/e/e1/Getting_Things_Done.jpg", ...]]</code></pre>
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            h2. Configuration
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            This is by default configured like this:
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            <pre><code>Wikipedia.Configure {
         | 
| 46 | 
            +
              domain 'en.wikipedia.org'
         | 
| 47 | 
            +
              path   'w/api.php'
         | 
| 48 | 
            +
            }</code></pre>
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            h2. Advanced
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            See the API spec at "http://en.wikipedia.org/w/api.php":http://en.wikipedia.org/w/api.php
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            If you need data that is not already present, you can override
         | 
| 55 | 
            +
            parameters.
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            For example, to retrieve only the page info:
         | 
| 58 | 
            +
             | 
| 59 | 
            +
            <pre><code>page = Wikipedia.find( 'Getting Things Done', :prop => "info" )
         | 
| 60 | 
            +
             | 
| 61 | 
            +
            page.title
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            => "Getting Things Done"
         | 
| 64 | 
            +
             | 
| 65 | 
            +
            page.raw_data
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            => {"query"=>{"pages"=>{"959928"=>{"pageid"=>959928, "ns"=>0,
         | 
| 68 | 
            +
            "title"=>"Getting Things Done", "touched"=>"2010-03-10T00:04:09Z",
         | 
| 69 | 
            +
            "lastrevid"=>348481810, "counter"=>0, "length"=>7891}}}}</code></pre>
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            h2. Running specs
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            if you have rspec >= 1.1.3 installed just type in
         | 
| 74 | 
            +
             | 
| 75 | 
            +
            rake spec
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            h2. Thanks!
         | 
| 78 | 
            +
             | 
| 79 | 
            +
            Copyright (c) 2008 [Cyril David], released under the MIT license
         | 
| 80 | 
            +
             | 
| 81 | 
            +
            Adopted by Ken Pratt (ken@kenpratt.net) in 2010/03
         | 
    
        data/Rakefile
    ADDED
    
    | @@ -0,0 +1,60 @@ | |
| 1 | 
            +
            require 'rubygems'
         | 
| 2 | 
            +
            require 'rake'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            begin
         | 
| 5 | 
            +
              require 'jeweler'
         | 
| 6 | 
            +
              Jeweler::Tasks.new do |gem|
         | 
| 7 | 
            +
                gem.name = "wikipedia-client"
         | 
| 8 | 
            +
                gem.summary = %Q{Ruby client for the Wikipedia API}
         | 
| 9 | 
            +
                gem.description = %Q{Ruby client for the Wikipedia API}
         | 
| 10 | 
            +
                gem.email = "christian.hellsten@gmail.com"
         | 
| 11 | 
            +
                gem.homepage = "http://github.com/christianhellsten/wikipedia-client"
         | 
| 12 | 
            +
                gem.authors = ["Cyril David", "Ken Pratt"]
         | 
| 13 | 
            +
                gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
         | 
| 14 | 
            +
                # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
         | 
| 15 | 
            +
              end
         | 
| 16 | 
            +
              Jeweler::GemcutterTasks.new
         | 
| 17 | 
            +
            rescue LoadError
         | 
| 18 | 
            +
              puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
         | 
| 19 | 
            +
            end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            require 'rake/testtask'
         | 
| 22 | 
            +
            Rake::TestTask.new(:test) do |test|
         | 
| 23 | 
            +
              test.libs << 'lib' << 'test'
         | 
| 24 | 
            +
              test.pattern = 'test/**/test_*.rb'
         | 
| 25 | 
            +
              test.verbose = true
         | 
| 26 | 
            +
            end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
             | 
| 29 | 
            +
            desc 'Test the wikipedia plugin.'
         | 
| 30 | 
            +
            task :spec do
         | 
| 31 | 
            +
                spec_path = File.expand_path(File.dirname(__FILE__) + '/spec/**/*.rb')
         | 
| 32 | 
            +
                  system("spec -cfs #{spec_path}")
         | 
| 33 | 
            +
            end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            begin
         | 
| 36 | 
            +
              require 'rcov/rcovtask'
         | 
| 37 | 
            +
              Rcov::RcovTask.new do |test|
         | 
| 38 | 
            +
                test.libs << 'test'
         | 
| 39 | 
            +
                test.pattern = 'test/**/test_*.rb'
         | 
| 40 | 
            +
                test.verbose = true
         | 
| 41 | 
            +
              end
         | 
| 42 | 
            +
            rescue LoadError
         | 
| 43 | 
            +
              task :rcov do
         | 
| 44 | 
            +
                abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
         | 
| 45 | 
            +
              end
         | 
| 46 | 
            +
            end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            task :spec => :check_dependencies
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            task :default => :spec
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            require 'rake/rdoctask'
         | 
| 53 | 
            +
            Rake::RDocTask.new do |rdoc|
         | 
| 54 | 
            +
              version = File.exist?('VERSION') ? File.read('VERSION') : ""
         | 
| 55 | 
            +
             | 
| 56 | 
            +
              rdoc.rdoc_dir = 'rdoc'
         | 
| 57 | 
            +
              rdoc.title = "wikipedia-client #{version}"
         | 
| 58 | 
            +
              rdoc.rdoc_files.include('README*')
         | 
| 59 | 
            +
              rdoc.rdoc_files.include('lib/**/*.rb')
         | 
| 60 | 
            +
            end
         | 
    
        data/VERSION
    ADDED
    
    | @@ -0,0 +1 @@ | |
| 1 | 
            +
            1.0.0
         | 
    
        data/init.rb
    ADDED
    
    | @@ -0,0 +1 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/lib/wikipedia'
         | 
    
        data/install.rb
    ADDED
    
    | @@ -0,0 +1 @@ | |
| 1 | 
            +
            puts File.read(File.dirname(__FILE__) + '/README')
         | 
    
        data/lib/wikipedia.rb
    ADDED
    
    | @@ -0,0 +1,37 @@ | |
| 1 | 
            +
            Dir[File.dirname(__FILE__) + '/wikipedia/**/*.rb'].each { |f| require f }
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'uri'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Wikipedia
         | 
| 6 | 
            +
              # Examples : 
         | 
| 7 | 
            +
              # page = Wikipedia.find('Rails') 
         | 
| 8 | 
            +
              # => #<Wikipedia:0x123102>
         | 
| 9 | 
            +
              # page.content
         | 
| 10 | 
            +
              # => wiki content appears here
         | 
| 11 | 
            +
              
         | 
| 12 | 
            +
              # basically just a wrapper for doing 
         | 
| 13 | 
            +
              # client = Wikipedia::Client.new
         | 
| 14 | 
            +
              # client.find('Rails')
         | 
| 15 | 
            +
              #
         | 
| 16 | 
            +
              def self.find( page, options = {} )
         | 
| 17 | 
            +
                client.find( page, options )
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
              def self.find_image( title, options = {} )
         | 
| 20 | 
            +
                client.find_image( title, options )
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
              
         | 
| 23 | 
            +
              def self.Configure(&block)
         | 
| 24 | 
            +
                Configuration.instance.instance_eval(&block)
         | 
| 25 | 
            +
              end
         | 
| 26 | 
            +
              
         | 
| 27 | 
            +
              Configure {
         | 
| 28 | 
            +
                domain 'en.wikipedia.org'
         | 
| 29 | 
            +
                path   'w/api.php'
         | 
| 30 | 
            +
              }
         | 
| 31 | 
            +
             | 
| 32 | 
            +
              private
         | 
| 33 | 
            +
             | 
| 34 | 
            +
              def self.client
         | 
| 35 | 
            +
                @client ||= Wikipedia::Client.new
         | 
| 36 | 
            +
              end
         | 
| 37 | 
            +
            end
         | 
| @@ -0,0 +1,91 @@ | |
| 1 | 
            +
            module Wikipedia
         | 
| 2 | 
            +
              class Client
         | 
| 3 | 
            +
                # see http://en.wikipedia.org/w/api.php
         | 
| 4 | 
            +
                BASE_URL = "http://:domain/:path?action=:action&format=json"
         | 
| 5 | 
            +
             | 
| 6 | 
            +
                attr_accessor :follow_redirects
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                def initialize
         | 
| 9 | 
            +
                  self.follow_redirects = true
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                def find( title, options = {} )
         | 
| 13 | 
            +
                  title = Url.new(title).title rescue title
         | 
| 14 | 
            +
                  page = Page.new( request_page( title, options ) )
         | 
| 15 | 
            +
                  while follow_redirects and page.redirect?
         | 
| 16 | 
            +
                    page = Page.new( request_page( page.redirect_title, options ))
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
                  page
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                def find_image( title, options = {} )
         | 
| 22 | 
            +
                  title = Url.new(title).title rescue title
         | 
| 23 | 
            +
                  Page.new( request_image( title, options ) )
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                # http://en.wikipedia.org/w/api.php?action=query&format=json&prop=revisions%7Clinks%7Cimages%7Ccategories&rvprop=content&titles=Flower%20(video%20game)
         | 
| 27 | 
            +
                def request_page( title, options = {} )
         | 
| 28 | 
            +
                  request( {
         | 
| 29 | 
            +
                             :action => "query",
         | 
| 30 | 
            +
                             :prop => %w{ revisions links images categories },
         | 
| 31 | 
            +
                             :rvprop => "content",
         | 
| 32 | 
            +
                             :titles => title
         | 
| 33 | 
            +
                           }.merge( options ) )
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                # http://en.wikipedia.org/w/api.php?action=query&format=json&prop=imageinfo&iiprop=url&titles=File:Flower.png
         | 
| 37 | 
            +
                def request_image( title, options = {} )
         | 
| 38 | 
            +
                  request( {
         | 
| 39 | 
            +
                             :action => "query",
         | 
| 40 | 
            +
                             :prop => "imageinfo",
         | 
| 41 | 
            +
                             :iiprop => "url",
         | 
| 42 | 
            +
                             :titles => title
         | 
| 43 | 
            +
                           }.merge( options ) )
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                def request( options )
         | 
| 47 | 
            +
                  require 'open-uri'
         | 
| 48 | 
            +
                  URI.parse( url_for( options ) ).read( "User-Agent" => "Ruby/#{RUBY_VERSION}" )
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                protected
         | 
| 52 | 
            +
                  def configuration_options
         | 
| 53 | 
            +
                    {
         | 
| 54 | 
            +
                      :domain => Configuration[:domain],
         | 
| 55 | 
            +
                      :path   => Configuration[:path]
         | 
| 56 | 
            +
                    }
         | 
| 57 | 
            +
                  end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                  def url_for( options )
         | 
| 60 | 
            +
                    url = BASE_URL.dup
         | 
| 61 | 
            +
                    options = configuration_options.merge( options )
         | 
| 62 | 
            +
                    options.each do |key, val|
         | 
| 63 | 
            +
                      value = urlify_value( val )
         | 
| 64 | 
            +
                      if url.include?( ":#{key}" )
         | 
| 65 | 
            +
                        url.sub! ":#{key}", value
         | 
| 66 | 
            +
                      else
         | 
| 67 | 
            +
                        url << "&#{key}=#{value}"
         | 
| 68 | 
            +
                      end
         | 
| 69 | 
            +
                    end
         | 
| 70 | 
            +
                    url
         | 
| 71 | 
            +
                  end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                  def urlify_value( val )
         | 
| 74 | 
            +
                    case val
         | 
| 75 | 
            +
                    when Array
         | 
| 76 | 
            +
                      encode( val.flatten.join( '|' ) )
         | 
| 77 | 
            +
                    else
         | 
| 78 | 
            +
                      encode( val )
         | 
| 79 | 
            +
                    end
         | 
| 80 | 
            +
                  end
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                  def encode( val )
         | 
| 83 | 
            +
                    case val
         | 
| 84 | 
            +
                    when String
         | 
| 85 | 
            +
                      URI.encode( val ).gsub( '&', '%26' )
         | 
| 86 | 
            +
                    else
         | 
| 87 | 
            +
                      val
         | 
| 88 | 
            +
                    end
         | 
| 89 | 
            +
                  end
         | 
| 90 | 
            +
              end
         | 
| 91 | 
            +
            end
         | 
| @@ -0,0 +1,25 @@ | |
| 1 | 
            +
            require 'singleton'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Wikipedia
         | 
| 4 | 
            +
              class Configuration
         | 
| 5 | 
            +
                include Singleton
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def self.directives(*directives)
         | 
| 8 | 
            +
                  directives.each do |directive|
         | 
| 9 | 
            +
                    define_method directive do |*args|
         | 
| 10 | 
            +
                      if args.empty?
         | 
| 11 | 
            +
                        return instance_variable_get("@#{directive}")
         | 
| 12 | 
            +
                      else
         | 
| 13 | 
            +
                        instance_variable_set("@#{directive}", args.first)
         | 
| 14 | 
            +
                      end
         | 
| 15 | 
            +
                    end
         | 
| 16 | 
            +
                  end
         | 
| 17 | 
            +
                end
         | 
| 18 | 
            +
                
         | 
| 19 | 
            +
                def self.[](directive)
         | 
| 20 | 
            +
                  instance.send(directive)
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
                
         | 
| 23 | 
            +
                directives :domain, :path
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
            end
         | 
| @@ -0,0 +1,109 @@ | |
| 1 | 
            +
            module Wikipedia
         | 
| 2 | 
            +
              class Page
         | 
| 3 | 
            +
                def initialize(json)
         | 
| 4 | 
            +
                  require 'json'
         | 
| 5 | 
            +
                  @json = json
         | 
| 6 | 
            +
                  @data = JSON::load(json)
         | 
| 7 | 
            +
                end
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def page
         | 
| 10 | 
            +
                  @data['query']['pages'].values.first
         | 
| 11 | 
            +
                end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def content
         | 
| 14 | 
            +
                  page['revisions'].first.values.first if page['revisions']
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def sanitized_content
         | 
| 18 | 
            +
                  self.class.sanitize(content)
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                def redirect?
         | 
| 22 | 
            +
                  content && content.match(/\#REDIRECT\s+\[\[(.*?)\]\]/i)
         | 
| 23 | 
            +
                end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                def redirect_title
         | 
| 26 | 
            +
                  if matches = redirect?
         | 
| 27 | 
            +
                    matches[1]
         | 
| 28 | 
            +
                  end
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                def title
         | 
| 32 | 
            +
                  page['title']
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                def categories
         | 
| 36 | 
            +
                  page['categories'].map {|c| c['title'] } if page['categories']
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                def links
         | 
| 40 | 
            +
                  page['links'].map {|c| c['title'] } if page['links']
         | 
| 41 | 
            +
                end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                def images
         | 
| 44 | 
            +
                  page['images'].map {|c| c['title'] } if page['images']
         | 
| 45 | 
            +
                end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                def image_url
         | 
| 48 | 
            +
                  page['imageinfo'].first['url'] if page['imageinfo']
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                def image_urls
         | 
| 52 | 
            +
                  if list = images
         | 
| 53 | 
            +
                    filtered = list.select {|i| i =~ /^file:.+\.(jpg|jpeg|png|gif)$/i && !i.include?("LinkFA-star") }
         | 
| 54 | 
            +
                    filtered.map do |title|
         | 
| 55 | 
            +
                      Wikipedia.find_image( title ).image_url
         | 
| 56 | 
            +
                    end
         | 
| 57 | 
            +
                  end
         | 
| 58 | 
            +
                end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                def raw_data
         | 
| 61 | 
            +
                  @data
         | 
| 62 | 
            +
                end
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                def json
         | 
| 65 | 
            +
                  @json
         | 
| 66 | 
            +
                end
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                def self.sanitize( s )
         | 
| 69 | 
            +
                  if s
         | 
| 70 | 
            +
                    s = s.dup
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                    # strip anything inside curly braces!
         | 
| 73 | 
            +
                    while s =~ /\{\{[^\{\}]+?\}\}/
         | 
| 74 | 
            +
                      s.gsub!(/\{\{[^\{\}]+?\}\}/, '')
         | 
| 75 | 
            +
                    end
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                    # strip info box
         | 
| 78 | 
            +
                    s.sub!(/^\{\|[^\{\}]+?\n\|\}\n/, '')
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                    # strip internal links
         | 
| 81 | 
            +
                    s.gsub!(/\[\[([^\]\|]+?)\|([^\]\|]+?)\]\]/, '\2')
         | 
| 82 | 
            +
                    s.gsub!(/\[\[([^\]\|]+?)\]\]/, '\1')
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                    # strip images and file links
         | 
| 85 | 
            +
                    s.gsub!(/\[\[Image:[^\[\]]+?\]\]/, '')
         | 
| 86 | 
            +
                    s.gsub!(/\[\[File:[^\[\]]+?\]\]/, '')
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                    # convert bold/italic to html
         | 
| 89 | 
            +
                    s.gsub!(/'''''(.+?)'''''/, '<b><i>\1</i></b>')
         | 
| 90 | 
            +
                    s.gsub!(/'''(.+?)'''/, '<b>\1</b>')
         | 
| 91 | 
            +
                    s.gsub!(/''(.+?)''/, '<i>\1</i>')
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                    # misc
         | 
| 94 | 
            +
                    s.gsub!(/<ref[^<>]*>[\s\S]*?<\/ref>/, '')
         | 
| 95 | 
            +
                    s.gsub!(/<!--[^>]+?-->/, '')
         | 
| 96 | 
            +
                    s.gsub!('  ', ' ')
         | 
| 97 | 
            +
                    s.strip!
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                    # create paragraphs
         | 
| 100 | 
            +
                    sections = s.split("\n\n")
         | 
| 101 | 
            +
                    if sections.size > 1
         | 
| 102 | 
            +
                      s = sections.map {|s| "<p>#{s.strip}</p>" }.join("\n")
         | 
| 103 | 
            +
                    end
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                    s
         | 
| 106 | 
            +
                  end
         | 
| 107 | 
            +
                end
         | 
| 108 | 
            +
              end
         | 
| 109 | 
            +
            end
         |