whatsa 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/environment.rb +1 -0
- data/lib/whatsa/article.rb +7 -1
- data/lib/whatsa/cli.rb +5 -16
- data/lib/whatsa/format.rb +46 -0
- data/lib/whatsa/scraper.rb +6 -3
- data/lib/whatsa/section.rb +8 -2
- data/lib/whatsa/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7cf846d19a9dba5468c21b861abeac0f2eddb90
|
4
|
+
data.tar.gz: ba374012eb9a91e66db755986d55716b4d8bf2b4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc3ddde58c65c630c7edf1e0bdfba09e1ad3072d2f81bb212f3c04d0dcecf85d49905a0565d515c03d135de82dfec7270bd816f9a17ae1d6aba782032e2e2bb5
|
7
|
+
data.tar.gz: 3ea318931845cf4e3787c1428cc991a253326080deba40d2b03c869dec56c62131930f12d4426d76b2613ad5e8dffb95119d964bbb73cb19abe2327f56461c48
|
data/config/environment.rb
CHANGED
data/lib/whatsa/article.rb
CHANGED
@@ -1,4 +1,10 @@
|
|
1
1
|
class Whatsa::Article
|
2
|
+
# I think this is a bad use of include. I feel like I _should_ make Format a
|
3
|
+
# class and use its methods that way, but I'm going to be using them so often
|
4
|
+
# that I would prefer they not be referenced as Whatsa::Format.blah_blah every
|
5
|
+
# time...
|
6
|
+
include Whatsa::Format
|
7
|
+
|
2
8
|
attr_accessor :sections
|
3
9
|
attr_reader :contents, :title
|
4
10
|
|
@@ -53,7 +59,7 @@ class Whatsa::Article
|
|
53
59
|
indices << -1
|
54
60
|
secs = [Whatsa::Section.new("#{self.title} - Introduction", intro_pars)]
|
55
61
|
indices.each_cons(2) do |i, j|
|
56
|
-
title = self.contents[i].text
|
62
|
+
title = heading_to_title(self.contents[i].text)
|
57
63
|
par_nodes = self.contents[i...j].select do |e|
|
58
64
|
e.name == 'p' || e.name == 'ul' && e.text != ""
|
59
65
|
end
|
data/lib/whatsa/cli.rb
CHANGED
@@ -1,4 +1,9 @@
|
|
1
1
|
class Whatsa::CLI
|
2
|
+
# I think this is a bad use of include. I feel like I _should_ make Format a
|
3
|
+
# class and use its methods that way, but I'm going to be using them so often
|
4
|
+
# that I would prefer they not be referenced as Whatsa::Format.blah_blah every
|
5
|
+
# time...
|
6
|
+
include Whatsa::Format
|
2
7
|
|
3
8
|
def clear_screen
|
4
9
|
50.times { puts "\n" }
|
@@ -44,22 +49,6 @@ class Whatsa::CLI
|
|
44
49
|
input
|
45
50
|
end
|
46
51
|
|
47
|
-
# setting an indent will indent the lines AFTER the first line of a paragraph
|
48
|
-
def word_wrap(text, indent=0)
|
49
|
-
count = 0
|
50
|
-
words = text.split(/ /)
|
51
|
-
words.each_with_index do |word, index|
|
52
|
-
count += word.length + 1
|
53
|
-
if count > 80
|
54
|
-
words.insert(index, "\n#{' ' * indent}")
|
55
|
-
count = indent
|
56
|
-
elsif word.index("\n")
|
57
|
-
count = word.length
|
58
|
-
end
|
59
|
-
end
|
60
|
-
words.join(" ").gsub(/^ /, "")
|
61
|
-
end
|
62
|
-
|
63
52
|
def display_dmb(dmb)
|
64
53
|
raise TypeError unless dmb.is_a?(Whatsa::Disambig)
|
65
54
|
clear_screen
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Whatsa
|
2
|
+
module Format
|
3
|
+
|
4
|
+
def url_friendly(string)
|
5
|
+
string.gsub(/[^A-z0-9\(\)]+/, '+').gsub(/(\A\+|\+\z)/, '')
|
6
|
+
end
|
7
|
+
|
8
|
+
def heading_to_title(string)
|
9
|
+
string.gsub('[edit]', '').strip
|
10
|
+
end
|
11
|
+
|
12
|
+
def remove_citation_markers(string)
|
13
|
+
string.gsub(/\[(\d+|citation needed)\]/, "")
|
14
|
+
end
|
15
|
+
|
16
|
+
def bulletize_lines(string)
|
17
|
+
if string.index("\n")
|
18
|
+
list = string.gsub(/^/, "- ")
|
19
|
+
word_wrap(list, "- ".length)
|
20
|
+
else
|
21
|
+
string
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# setting an indent will indent the lines AFTER the first line of a paragraph
|
26
|
+
def word_wrap(text, indent=0)
|
27
|
+
chars = text.split(//)
|
28
|
+
unless text.length < 80
|
29
|
+
count = 1
|
30
|
+
last_space = 80
|
31
|
+
chars.each_with_index do |char, index|
|
32
|
+
count += 1
|
33
|
+
last_space = index if char.match(/ /)
|
34
|
+
if char == "\n"
|
35
|
+
count = indent
|
36
|
+
elsif count == 80
|
37
|
+
chars[last_space] = "\n#{" " * indent}"
|
38
|
+
count = indent + index - last_space
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
chars.join
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
data/lib/whatsa/scraper.rb
CHANGED
@@ -1,4 +1,9 @@
|
|
1
1
|
class Whatsa::Scraper
|
2
|
+
# I think this is a bad use of include. I feel like I _should_ make Format a
|
3
|
+
# class and use its methods that way, but I'm going to be using them so often
|
4
|
+
# that I would prefer they not be referenced as Whatsa::Format.blah_blah every
|
5
|
+
# time...
|
6
|
+
include Whatsa::Format
|
2
7
|
|
3
8
|
WIKISEARCH = 'https://en.wikipedia.org/w/index.php?search='
|
4
9
|
|
@@ -7,8 +12,7 @@ class Whatsa::Scraper
|
|
7
12
|
def initialize(term)
|
8
13
|
# only keep word chars and parens, turn everything between each 'word'
|
9
14
|
# to a single '+' and remove '+'s at the beginning and end if they're there
|
10
|
-
|
11
|
-
@query = term.gsub(/[^A-z0-9\(\)]+/, '+').gsub(/(\A\+|\+\z)/, '')
|
15
|
+
@query = url_friendly(term)
|
12
16
|
|
13
17
|
# store the page in an instance variable so we don't keep polling the site
|
14
18
|
@page = Nokogiri::HTML(open(WIKISEARCH + self.query))
|
@@ -46,5 +50,4 @@ class Whatsa::Scraper
|
|
46
50
|
def make_disambig
|
47
51
|
disambig? ? Whatsa::Disambig.new(self.page) : nil
|
48
52
|
end
|
49
|
-
|
50
53
|
end
|
data/lib/whatsa/section.rb
CHANGED
@@ -1,4 +1,10 @@
|
|
1
1
|
class Whatsa::Section
|
2
|
+
# I think this is a bad use of include. I feel like I _should_ make Format a
|
3
|
+
# class and use its methods that way, but I'm going to be using them so often
|
4
|
+
# that I would prefer they not be referenced as Whatsa::Format.blah_blah every
|
5
|
+
# time...
|
6
|
+
include Whatsa::Format
|
7
|
+
|
2
8
|
attr_accessor :title, :paragraphs, :article
|
3
9
|
|
4
10
|
def initialize(title, paragraphs)
|
@@ -19,11 +25,11 @@ class Whatsa::Section
|
|
19
25
|
private
|
20
26
|
|
21
27
|
def remove_citations
|
22
|
-
self.paragraphs.map! { |par| par
|
28
|
+
self.paragraphs.map! { |par| remove_citation_markers(par) }
|
23
29
|
end
|
24
30
|
|
25
31
|
def bullet_list_pars
|
26
|
-
self.paragraphs.map! { |par|
|
32
|
+
self.paragraphs.map! { |par| bulletize_lines(par) }
|
27
33
|
end
|
28
34
|
|
29
35
|
end
|
data/lib/whatsa/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: whatsa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Keegan Leitz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -90,6 +90,7 @@ files:
|
|
90
90
|
- lib/whatsa/article.rb
|
91
91
|
- lib/whatsa/cli.rb
|
92
92
|
- lib/whatsa/disambig.rb
|
93
|
+
- lib/whatsa/format.rb
|
93
94
|
- lib/whatsa/scraper.rb
|
94
95
|
- lib/whatsa/section.rb
|
95
96
|
- lib/whatsa/version.rb
|