wikio 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/wikireducer +48 -0
  3. data/lib/wikio.rb +41 -0
  4. metadata +46 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 346b20f9174cb1d24c10850f5688fd6d916ddcbf
4
+ data.tar.gz: bdbc0f5e08ff93d15e570237ce9ded592b76b067
5
+ SHA512:
6
+ metadata.gz: 39fdd792e2f4c8f9b9dd3242d4ae896606fe8203e9eaaed8c8caea6172f073f91233d44d988c3219d89c813eed26a28b44a7d0a0df9ed9f8418590a06dcd43d6
7
+ data.tar.gz: 47cbea5777dec4da685ffd82462fbb72b75ab76967cd0ef3a257fc5d168c7e5c7091fd10eb408dffcf033c8c9407a93f2dc195b54fd8d834fb9fd4b49ada198c
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'set'
5
+
6
+ require 'wikio'
7
+
8
+ # Walk the path of first links on wikipedia pages
9
+ # @param src @String - Wikipedia Url
10
+ # @param dst @string - Wikipedia Url
11
+ def walk(src, dst)
12
+ visited = Set.new([src])
13
+ current = src
14
+ count = 0
15
+
16
+ while true
17
+ if current == dst
18
+ puts "#{src} -> #{dst} in #{count} steps"
19
+ break
20
+ end
21
+ STDERR.puts "#{src} -> #{current}"
22
+ current = Wikio.get_first_link(current)
23
+ if visited.include?(current)
24
+ puts "Cycle detected for #{src} at node #{current}"
25
+ break
26
+ end
27
+ visited.add(current)
28
+ count += 1
29
+ end
30
+ end
31
+
32
+ options = {dst: 'Philosophy'}
33
+ OptionParser.new do |opts|
34
+ opts.banner = "Usage: wikio --dst=<destination_article> <src1> <src2> ..."
35
+
36
+ opts.on('-dst', '--dst DESTINATION', 'Destination article') do |dst|
37
+ options[:dst] = Wikio.get_wiki_url(dst)
38
+ end
39
+ end.parse!
40
+
41
+ puts "Searching for #{options[:dst]}"
42
+ ARGV.map do |term|
43
+ Wikio.get_wiki_url(term)
44
+ end.map do |url|
45
+ Thread.new { walk(url, options[:dst]) }
46
+ end.map do |thread|
47
+ thread.join
48
+ end
@@ -0,0 +1,41 @@
1
+ require 'json'
2
+ require 'net/http'
3
+ require 'nokogiri'
4
+ require 'uri'
5
+
6
+ # Resources
7
+ # - https://stackoverflow.com/questions/27457977/searching-wikipedia-using-api
8
+
9
+ module Wikio
10
+
11
+ WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php'
12
+ WIKIPEDIA_DOMAIN = 'https://en.wikipedia.org'
13
+ def self.get_wiki_url(term)
14
+ params = {
15
+ action: 'opensearch',
16
+ search: term,
17
+ limit: 1,
18
+ namespace: 0,
19
+ format: 'json'
20
+ }
21
+ uri = URI(WIKIPEDIA_API_URL)
22
+ uri.query = URI.encode_www_form(params)
23
+ res = Net::HTTP.get(uri)
24
+ return JSON.parse(res)[3][0]
25
+ end
26
+
27
+ def self.get_first_link(wiki_url)
28
+ uri = URI(wiki_url)
29
+ body = Net::HTTP.get(uri)
30
+ doc = Nokogiri::HTML(body)
31
+ subdoc = doc.xpath(
32
+ "//div[contains(@class, 'mw-parser-output')]/p/a"\
33
+ " | //div[contains(@class, 'mw-parser-output')]/p/i/a"
34
+ ).each do |node|
35
+ href = node.attr('href')
36
+ if href =~ /\A\/wiki/
37
+ return WIKIPEDIA_DOMAIN + href
38
+ end
39
+ end
40
+ end
41
+ end
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wikio
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Daniel Cardoza
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-01-30 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Library containing helper functions for wikipedia
14
+ email: danielpcardoza@gmail.com
15
+ executables:
16
+ - wikireducer
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/wikireducer
21
+ - lib/wikio.rb
22
+ homepage: http://rubygems.org/gems/wikio
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.6.11
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Library for extracting information from wikipedia pages
46
+ test_files: []