wikio 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/wikireducer +48 -0
- data/lib/wikio.rb +41 -0
- metadata +46 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 346b20f9174cb1d24c10850f5688fd6d916ddcbf
|
4
|
+
data.tar.gz: bdbc0f5e08ff93d15e570237ce9ded592b76b067
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 39fdd792e2f4c8f9b9dd3242d4ae896606fe8203e9eaaed8c8caea6172f073f91233d44d988c3219d89c813eed26a28b44a7d0a0df9ed9f8418590a06dcd43d6
|
7
|
+
data.tar.gz: 47cbea5777dec4da685ffd82462fbb72b75ab76967cd0ef3a257fc5d168c7e5c7091fd10eb408dffcf033c8c9407a93f2dc195b54fd8d834fb9fd4b49ada198c
|
data/bin/wikireducer
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'set'
|
5
|
+
|
6
|
+
require 'wikio'
|
7
|
+
|
8
|
+
# Walk the path of first links on wikipedia pages
|
9
|
+
# @param src @String - Wikipedia Url
|
10
|
+
# @param dst @string - Wikipedia Url
|
11
|
+
def walk(src, dst)
|
12
|
+
visited = Set.new([src])
|
13
|
+
current = src
|
14
|
+
count = 0
|
15
|
+
|
16
|
+
while true
|
17
|
+
if current == dst
|
18
|
+
puts "#{src} -> #{dst} in #{count} steps"
|
19
|
+
break
|
20
|
+
end
|
21
|
+
STDERR.puts "#{src} -> #{current}"
|
22
|
+
current = Wikio.get_first_link(current)
|
23
|
+
if visited.include?(current)
|
24
|
+
puts "Cycle detected for #{src} at node #{current}"
|
25
|
+
break
|
26
|
+
end
|
27
|
+
visited.add(current)
|
28
|
+
count += 1
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
options = {dst: 'Philosophy'}
|
33
|
+
OptionParser.new do |opts|
|
34
|
+
opts.banner = "Usage: wikio --dst=<destination_article> <src1> <src2> ..."
|
35
|
+
|
36
|
+
opts.on('-dst', '--dst DESTINATION', 'Destination article') do |dst|
|
37
|
+
options[:dst] = Wikio.get_wiki_url(dst)
|
38
|
+
end
|
39
|
+
end.parse!
|
40
|
+
|
41
|
+
puts "Searching for #{options[:dst]}"
|
42
|
+
ARGV.map do |term|
|
43
|
+
Wikio.get_wiki_url(term)
|
44
|
+
end.map do |url|
|
45
|
+
Thread.new { walk(url, options[:dst]) }
|
46
|
+
end.map do |thread|
|
47
|
+
thread.join
|
48
|
+
end
|
data/lib/wikio.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'net/http'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
# Resources
|
7
|
+
# - https://stackoverflow.com/questions/27457977/searching-wikipedia-using-api
|
8
|
+
|
9
|
+
module Wikio
|
10
|
+
|
11
|
+
WIKIPEDIA_API_URL = 'https://en.wikipedia.org/w/api.php'
|
12
|
+
WIKIPEDIA_DOMAIN = 'https://en.wikipedia.org'
|
13
|
+
def self.get_wiki_url(term)
|
14
|
+
params = {
|
15
|
+
action: 'opensearch',
|
16
|
+
search: term,
|
17
|
+
limit: 1,
|
18
|
+
namespace: 0,
|
19
|
+
format: 'json'
|
20
|
+
}
|
21
|
+
uri = URI(WIKIPEDIA_API_URL)
|
22
|
+
uri.query = URI.encode_www_form(params)
|
23
|
+
res = Net::HTTP.get(uri)
|
24
|
+
return JSON.parse(res)[3][0]
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.get_first_link(wiki_url)
|
28
|
+
uri = URI(wiki_url)
|
29
|
+
body = Net::HTTP.get(uri)
|
30
|
+
doc = Nokogiri::HTML(body)
|
31
|
+
subdoc = doc.xpath(
|
32
|
+
"//div[contains(@class, 'mw-parser-output')]/p/a"\
|
33
|
+
" | //div[contains(@class, 'mw-parser-output')]/p/i/a"
|
34
|
+
).each do |node|
|
35
|
+
href = node.attr('href')
|
36
|
+
if href =~ /\A\/wiki/
|
37
|
+
return WIKIPEDIA_DOMAIN + href
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wikio
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Daniel Cardoza
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-01-30 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Library containing helper functions for wikipedia
|
14
|
+
email: danielpcardoza@gmail.com
|
15
|
+
executables:
|
16
|
+
- wikireducer
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- bin/wikireducer
|
21
|
+
- lib/wikio.rb
|
22
|
+
homepage: http://rubygems.org/gems/wikio
|
23
|
+
licenses:
|
24
|
+
- MIT
|
25
|
+
metadata: {}
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubyforge_project:
|
42
|
+
rubygems_version: 2.6.11
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: Library for extracting information from wikipedia pages
|
46
|
+
test_files: []
|