wikio 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/wikireducer +48 -0
  3. data/lib/wikio.rb +41 -0
  4. metadata +46 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 346b20f9174cb1d24c10850f5688fd6d916ddcbf
4
+ data.tar.gz: bdbc0f5e08ff93d15e570237ce9ded592b76b067
5
+ SHA512:
6
+ metadata.gz: 39fdd792e2f4c8f9b9dd3242d4ae896606fe8203e9eaaed8c8caea6172f073f91233d44d988c3219d89c813eed26a28b44a7d0a0df9ed9f8418590a06dcd43d6
7
+ data.tar.gz: 47cbea5777dec4da685ffd82462fbb72b75ab76967cd0ef3a257fc5d168c7e5c7091fd10eb408dffcf033c8c9407a93f2dc195b54fd8d834fb9fd4b49ada198c
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'set'
5
+
6
+ require 'wikio'
7
+
8
+ # Walk the path of first links on wikipedia pages
9
+ # @param src @String - Wikipedia Url
10
+ # @param dst @string - Wikipedia Url
11
+ def walk(src, dst)
12
+ visited = Set.new([src])
13
+ current = src
14
+ count = 0
15
+
16
+ while true
17
+ if current == dst
18
+ puts "#{src} -> #{dst} in #{count} steps"
19
+ break
20
+ end
21
+ STDERR.puts "#{src} -> #{current}"
22
+ current = Wikio.get_first_link(current)
23
+ if visited.include?(current)
24
+ puts "Cycle detected for #{src} at node #{current}"
25
+ break
26
+ end
27
+ visited.add(current)
28
+ count += 1
29
+ end
30
+ end
31
+
32
+ options = {dst: 'Philosophy'}
33
+ OptionParser.new do |opts|
34
+ opts.banner = "Usage: wikio --dst=<destination_article> <src1> <src2> ..."
35
+
36
+ opts.on('-dst', '--dst DESTINATION', 'Destination article') do |dst|
37
+ options[:dst] = Wikio.get_wiki_url(dst)
38
+ end
39
+ end.parse!
40
+
41
+ puts "Searching for #{options[:dst]}"
42
+ ARGV.map do |term|
43
+ Wikio.get_wiki_url(term)
44
+ end.map do |url|
45
+ Thread.new { walk(url, options[:dst]) }
46
+ end.map do |thread|
47
+ thread.join
48
+ end
@@ -0,0 +1,41 @@
1
+ require 'json'
2
+ require 'net/http'
3
+ require 'nokogiri'
4
+ require 'uri'
5
+
6
+ # Resources
7
+ # - https://stackoverflow.com/questions/27457977/searching-wikipedia-using-api
8
+
9
+ module Wikio
10
+
11
+ WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php'
12
+ WIKIPEDIA_DOMAIN = 'https://en.wikipedia.org'
13
+ def self.get_wiki_url(term)
14
+ params = {
15
+ action: 'opensearch',
16
+ search: term,
17
+ limit: 1,
18
+ namespace: 0,
19
+ format: 'json'
20
+ }
21
+ uri = URI(WIKIPEDIA_API_URL)
22
+ uri.query = URI.encode_www_form(params)
23
+ res = Net::HTTP.get(uri)
24
+ return JSON.parse(res)[3][0]
25
+ end
26
+
27
+ def self.get_first_link(wiki_url)
28
+ uri = URI(wiki_url)
29
+ body = Net::HTTP.get(uri)
30
+ doc = Nokogiri::HTML(body)
31
+ subdoc = doc.xpath(
32
+ "//div[contains(@class, 'mw-parser-output')]/p/a"\
33
+ " | //div[contains(@class, 'mw-parser-output')]/p/i/a"
34
+ ).each do |node|
35
+ href = node.attr('href')
36
+ if href =~ /\A\/wiki/
37
+ return WIKIPEDIA_DOMAIN + href
38
+ end
39
+ end
40
+ end
41
+ end
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wikio
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Daniel Cardoza
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-01-30 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Library containing helper functions for wikipedia
14
+ email: danielpcardoza@gmail.com
15
+ executables:
16
+ - wikireducer
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/wikireducer
21
+ - lib/wikio.rb
22
+ homepage: http://rubygems.org/gems/wikio
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.6.11
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Library for extracting information from wikipedia pages
46
+ test_files: []