wiki-yggdrasil 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/README.md +1 -8
- data/lib/wiki/article.rb +11 -6
- data/lib/wiki/yggdrasil.rb +22 -17
- data/lib/wiki/yggdrasil/version.rb +1 -1
- data/wiki-yggdrasil.gemspec +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc95b7532698990234e748a8b3ba5753e6d9a6b129a14c292ca99c17f6fcdfd4
|
4
|
+
data.tar.gz: af3575d78d2f1cc313ab38bda507bc09227942fbaded2dd8127eab3316aeca6d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b93fab2fd30a9a1e2b778f82f0c2b3103cc0f4f15277d34bb36b245a5becdc71ff97172daaa275bd38c1d0f2ee8b295239053dae9885f1d3634d98daf4aa1041
|
7
|
+
data.tar.gz: f887259620bf814d809b4c301ba595eaace9c51bac4d6e8ee65242df973566df13f26e5ee5c0c85cfd2c14f0feed5efb76419211a5fe2f4d25fc3ea33ddec8e7
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.6.1
|
data/README.md
CHANGED
@@ -1,10 +1,6 @@
|
|
1
1
|
# Wiki::Yggdrasil
|
2
2
|
![Travis CI Build](https://travis-ci.org/alex0112/wiki-yggdrasil.svg?branch=master)
|
3
3
|
|
4
|
-
You. You're up late at night again reading up on some obscure mathematical topic. You find yourself with *so many* open tabs on Wikipedia. Wouldn't it be nice if you could just pick an article, and then view a tree of the articles it references?
|
5
|
-
|
6
|
-
Introducing Wiki::Yggdrasil. Named after the tree in Norse mythology that drinks from the well of all wisdom, Wiki::Yggdrasil is here to help you drink just as deeply from the well of wisdom that is Wikipedia.
|
7
|
-
|
8
4
|
Wiki::Yggdrasil takes a Wikipedia URI as an argument, and proceeds to spider out a dependency tree of referenced articles.
|
9
5
|
|
10
6
|
## Usage
|
@@ -38,10 +34,7 @@ The preceeding code produces a structure as follows:
|
|
38
34
|
{:name=>"Ragnarök", :children=>[]}
|
39
35
|
```
|
40
36
|
|
41
|
-
|
42
|
-
|
43
|
-
### This is taking a long time. Is that normal?
|
44
|
-
Yes. This is normal. Any Yggdrasil object created with a depth of three or higher will likely take a few minutes to scrape the necessary information.
|
37
|
+
_Note: Any Yggdrasil object created with a depth of three or higher will likely take a few minutes to scrape the necessary information._
|
45
38
|
|
46
39
|
## Installation
|
47
40
|
|
data/lib/wiki/article.rb
CHANGED
@@ -18,14 +18,15 @@ module Wiki::Yggdrasil
|
|
18
18
|
@summary ||= Nokogiri::HTML(Nokogiri::HTML(open(self.uri)).to_s.split('<div id="toc" class="toc">')[0]).css('p') ## TODO: Cleanup
|
19
19
|
end
|
20
20
|
|
21
|
-
def child_links
|
21
|
+
def child_links(help: false)
|
22
22
|
formatted_links = format_links
|
23
|
-
validated_links =
|
23
|
+
validated_links = formatted_links.select { |uri| Wiki::Yggdrasil::Article.is_valid_wiki_article?(uri: uri) }
|
24
|
+
|
24
25
|
@child_links ||= validated_links
|
25
26
|
end
|
26
27
|
|
27
|
-
def
|
28
|
-
self.summary.css('p a')
|
28
|
+
def scrape_links(help_links: false) ## TODO test help_links param in spec
|
29
|
+
help_links ? self.summary.css('p a') : self.summary.css('p a[href!="/wiki/Help:IPA/English"]')
|
29
30
|
end
|
30
31
|
|
31
32
|
def name
|
@@ -33,16 +34,20 @@ module Wiki::Yggdrasil
|
|
33
34
|
## TODO: Cleanup
|
34
35
|
end
|
35
36
|
|
36
|
-
def format_links(anchors: self.
|
37
|
+
def format_links(anchors: self.scrape_links)
|
37
38
|
uris = anchors.map do |anchor|
|
38
39
|
anchor.nil? || anchor['href'].nil? ? next : 'https://en.wikipedia.org' << anchor['href'] ## nil href attributes are often self refs (but possibly not always). Ignore them.
|
40
|
+
## TODO: take care of this in .scrape_links with a css selector (like the Help:IPA links)
|
39
41
|
end
|
40
42
|
|
41
43
|
uris.compact
|
42
44
|
end
|
43
45
|
|
46
|
+
def self.remove_italic_tags(uri_list)
|
47
|
+
|
48
|
+
end
|
49
|
+
|
44
50
|
def self.is_valid_wiki_article?(uri:)
|
45
|
-
## Is this URI a wikipedia article?
|
46
51
|
uri =~ /.*wikipedia\.org\/wiki\/.+/ ? true : false
|
47
52
|
end
|
48
53
|
|
data/lib/wiki/yggdrasil.rb
CHANGED
@@ -13,26 +13,31 @@ module Wiki
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def children(depth: 4, article_children: self.root.child_links)
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
16
|
+
@children ||= { name: self.root.name, children: recursive_scrape(depth: depth), index: 0, depth: 0 }
|
17
|
+
end
|
18
|
+
|
19
|
+
def recursive_scrape(depth: 1, children: @root.child_links)
|
20
|
+
children.each_with_index.map do |uri, index|
|
21
|
+
article = Wiki::Yggdrasil::Article.new(uri: uri)
|
22
|
+
if (depth == 1)
|
23
|
+
{
|
24
|
+
name: article.name,
|
25
|
+
index: index + 1,
|
26
|
+
level: depth,
|
27
|
+
children: [],
|
28
|
+
}
|
29
|
+
else
|
30
|
+
{
|
31
|
+
name: article.name,
|
32
|
+
index: index + 1,
|
33
|
+
level: depth,
|
34
|
+
children: recursive_scrape(depth - 1, article.child_links),
|
35
|
+
}
|
30
36
|
end
|
31
37
|
end
|
32
|
-
|
33
|
-
@children ||= { name: self.root.name, children: get_children.call(depth, article_children) }
|
34
38
|
end
|
35
|
-
|
36
39
|
end
|
40
|
+
|
37
41
|
end
|
42
|
+
|
38
43
|
end
|
data/wiki-yggdrasil.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
|
|
32
32
|
|
33
33
|
spec.add_runtime_dependency "nokogiri", "~> 1.8.2"
|
34
34
|
|
35
|
-
spec.add_development_dependency "bundler", "
|
35
|
+
spec.add_development_dependency "bundler", "2.0.1"
|
36
36
|
spec.add_development_dependency "rake", "~> 10.0"
|
37
37
|
spec.add_development_dependency "rspec", "~> 3.0"
|
38
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wiki-yggdrasil
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- alex0112
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -28,16 +28,16 @@ dependencies:
|
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 2.0.1
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 2.0.1
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -76,6 +76,7 @@ extra_rdoc_files: []
|
|
76
76
|
files:
|
77
77
|
- ".gitignore"
|
78
78
|
- ".rspec"
|
79
|
+
- ".ruby-version"
|
79
80
|
- ".travis.yml"
|
80
81
|
- Gemfile
|
81
82
|
- LICENSE.txt
|
@@ -107,8 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
108
|
- !ruby/object:Gem::Version
|
108
109
|
version: '0'
|
109
110
|
requirements: []
|
110
|
-
|
111
|
-
rubygems_version: 2.7.4
|
111
|
+
rubygems_version: 3.0.3
|
112
112
|
signing_key:
|
113
113
|
specification_version: 4
|
114
114
|
summary: Scrape Wikipedia articles and generate a json tree
|