wiki-yggdrasil 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -0
- data/README.md +1 -8
- data/lib/wiki/article.rb +11 -6
- data/lib/wiki/yggdrasil.rb +22 -17
- data/lib/wiki/yggdrasil/version.rb +1 -1
- data/wiki-yggdrasil.gemspec +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc95b7532698990234e748a8b3ba5753e6d9a6b129a14c292ca99c17f6fcdfd4
|
4
|
+
data.tar.gz: af3575d78d2f1cc313ab38bda507bc09227942fbaded2dd8127eab3316aeca6d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b93fab2fd30a9a1e2b778f82f0c2b3103cc0f4f15277d34bb36b245a5becdc71ff97172daaa275bd38c1d0f2ee8b295239053dae9885f1d3634d98daf4aa1041
|
7
|
+
data.tar.gz: f887259620bf814d809b4c301ba595eaace9c51bac4d6e8ee65242df973566df13f26e5ee5c0c85cfd2c14f0feed5efb76419211a5fe2f4d25fc3ea33ddec8e7
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.6.1
|
data/README.md
CHANGED
@@ -1,10 +1,6 @@
|
|
1
1
|
# Wiki::Yggdrasil
|
2
2
|

|
3
3
|
|
4
|
-
You. You're up late at night again reading up on some obscure mathematical topic. You find yourself with *so many* open tabs on Wikipedia. Wouldn't it be nice if you could just pick an article, and then view a tree of the articles it references?
|
5
|
-
|
6
|
-
Introducing Wiki::Yggdrasil. Named after the tree in Norse mythology that drinks from the well of all wisdom, Wiki::Yggdrasil is here to help you drink just as deeply from the well of wisdom that is Wikipedia.
|
7
|
-
|
8
4
|
Wiki::Yggdrasil takes a Wikipedia URI as an argument, and proceeds to spider out a dependency tree of referenced articles.
|
9
5
|
|
10
6
|
## Usage
|
@@ -38,10 +34,7 @@ The preceeding code produces a structure as follows:
|
|
38
34
|
{:name=>"Ragnarök", :children=>[]}
|
39
35
|
```
|
40
36
|
|
41
|
-
|
42
|
-
|
43
|
-
### This is taking a long time. Is that normal?
|
44
|
-
Yes. This is normal. Any Yggdrasil object created with a depth of three or higher will likely take a few minutes to scrape the necessary information.
|
37
|
+
_Note: Any Yggdrasil object created with a depth of three or higher will likely take a few minutes to scrape the necessary information._
|
45
38
|
|
46
39
|
## Installation
|
47
40
|
|
data/lib/wiki/article.rb
CHANGED
@@ -18,14 +18,15 @@ module Wiki::Yggdrasil
|
|
18
18
|
@summary ||= Nokogiri::HTML(Nokogiri::HTML(open(self.uri)).to_s.split('<div id="toc" class="toc">')[0]).css('p') ## TODO: Cleanup
|
19
19
|
end
|
20
20
|
|
21
|
-
def child_links
|
21
|
+
def child_links(help: false)
|
22
22
|
formatted_links = format_links
|
23
|
-
validated_links =
|
23
|
+
validated_links = formatted_links.select { |uri| Wiki::Yggdrasil::Article.is_valid_wiki_article?(uri: uri) }
|
24
|
+
|
24
25
|
@child_links ||= validated_links
|
25
26
|
end
|
26
27
|
|
27
|
-
def
|
28
|
-
self.summary.css('p a')
|
28
|
+
def scrape_links(help_links: false) ## TODO test help_links param in spec
|
29
|
+
help_links ? self.summary.css('p a') : self.summary.css('p a[href!="/wiki/Help:IPA/English"]')
|
29
30
|
end
|
30
31
|
|
31
32
|
def name
|
@@ -33,16 +34,20 @@ module Wiki::Yggdrasil
|
|
33
34
|
## TODO: Cleanup
|
34
35
|
end
|
35
36
|
|
36
|
-
def format_links(anchors: self.
|
37
|
+
def format_links(anchors: self.scrape_links)
|
37
38
|
uris = anchors.map do |anchor|
|
38
39
|
anchor.nil? || anchor['href'].nil? ? next : 'https://en.wikipedia.org' << anchor['href'] ## nil href attributes are often self refs (but possibly not always). Ignore them.
|
40
|
+
## TODO: take care of this in .scrape_links with a css selector (like the Help:IPA links)
|
39
41
|
end
|
40
42
|
|
41
43
|
uris.compact
|
42
44
|
end
|
43
45
|
|
46
|
+
def self.remove_italic_tags(uri_list)
|
47
|
+
|
48
|
+
end
|
49
|
+
|
44
50
|
def self.is_valid_wiki_article?(uri:)
|
45
|
-
## Is this URI a wikipedia article?
|
46
51
|
uri =~ /.*wikipedia\.org\/wiki\/.+/ ? true : false
|
47
52
|
end
|
48
53
|
|
data/lib/wiki/yggdrasil.rb
CHANGED
@@ -13,26 +13,31 @@ module Wiki
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def children(depth: 4, article_children: self.root.child_links)
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
16
|
+
@children ||= { name: self.root.name, children: recursive_scrape(depth: depth), index: 0, depth: 0 }
|
17
|
+
end
|
18
|
+
|
19
|
+
def recursive_scrape(depth: 1, children: @root.child_links)
|
20
|
+
children.each_with_index.map do |uri, index|
|
21
|
+
article = Wiki::Yggdrasil::Article.new(uri: uri)
|
22
|
+
if (depth == 1)
|
23
|
+
{
|
24
|
+
name: article.name,
|
25
|
+
index: index + 1,
|
26
|
+
level: depth,
|
27
|
+
children: [],
|
28
|
+
}
|
29
|
+
else
|
30
|
+
{
|
31
|
+
name: article.name,
|
32
|
+
index: index + 1,
|
33
|
+
level: depth,
|
34
|
+
children: recursive_scrape(depth - 1, article.child_links),
|
35
|
+
}
|
30
36
|
end
|
31
37
|
end
|
32
|
-
|
33
|
-
@children ||= { name: self.root.name, children: get_children.call(depth, article_children) }
|
34
38
|
end
|
35
|
-
|
36
39
|
end
|
40
|
+
|
37
41
|
end
|
42
|
+
|
38
43
|
end
|
data/wiki-yggdrasil.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
|
|
32
32
|
|
33
33
|
spec.add_runtime_dependency "nokogiri", "~> 1.8.2"
|
34
34
|
|
35
|
-
spec.add_development_dependency "bundler", "
|
35
|
+
spec.add_development_dependency "bundler", "2.0.1"
|
36
36
|
spec.add_development_dependency "rake", "~> 10.0"
|
37
37
|
spec.add_development_dependency "rspec", "~> 3.0"
|
38
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wiki-yggdrasil
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- alex0112
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -28,16 +28,16 @@ dependencies:
|
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 2.0.1
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 2.0.1
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -76,6 +76,7 @@ extra_rdoc_files: []
|
|
76
76
|
files:
|
77
77
|
- ".gitignore"
|
78
78
|
- ".rspec"
|
79
|
+
- ".ruby-version"
|
79
80
|
- ".travis.yml"
|
80
81
|
- Gemfile
|
81
82
|
- LICENSE.txt
|
@@ -107,8 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
108
|
- !ruby/object:Gem::Version
|
108
109
|
version: '0'
|
109
110
|
requirements: []
|
110
|
-
|
111
|
-
rubygems_version: 2.7.4
|
111
|
+
rubygems_version: 3.0.3
|
112
112
|
signing_key:
|
113
113
|
specification_version: 4
|
114
114
|
summary: Scrape Wikipedia articles and generate a json tree
|