whatsa 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/environment.rb +1 -0
- data/lib/whatsa/article.rb +7 -1
- data/lib/whatsa/cli.rb +5 -16
- data/lib/whatsa/format.rb +46 -0
- data/lib/whatsa/scraper.rb +6 -3
- data/lib/whatsa/section.rb +8 -2
- data/lib/whatsa/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7cf846d19a9dba5468c21b861abeac0f2eddb90
|
4
|
+
data.tar.gz: ba374012eb9a91e66db755986d55716b4d8bf2b4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc3ddde58c65c630c7edf1e0bdfba09e1ad3072d2f81bb212f3c04d0dcecf85d49905a0565d515c03d135de82dfec7270bd816f9a17ae1d6aba782032e2e2bb5
|
7
|
+
data.tar.gz: 3ea318931845cf4e3787c1428cc991a253326080deba40d2b03c869dec56c62131930f12d4426d76b2613ad5e8dffb95119d964bbb73cb19abe2327f56461c48
|
data/config/environment.rb
CHANGED
data/lib/whatsa/article.rb
CHANGED
@@ -1,4 +1,10 @@
|
|
1
1
|
class Whatsa::Article
|
2
|
+
# I think this is a bad use of include. I feel like I _should_ make Format a
|
3
|
+
# class and use its methods that way, but I'm going to be using them so often
|
4
|
+
# that I would prefer they not be referenced as Whatsa::Format.blah_blah every
|
5
|
+
# time...
|
6
|
+
include Whatsa::Format
|
7
|
+
|
2
8
|
attr_accessor :sections
|
3
9
|
attr_reader :contents, :title
|
4
10
|
|
@@ -53,7 +59,7 @@ class Whatsa::Article
|
|
53
59
|
indices << -1
|
54
60
|
secs = [Whatsa::Section.new("#{self.title} - Introduction", intro_pars)]
|
55
61
|
indices.each_cons(2) do |i, j|
|
56
|
-
title = self.contents[i].text
|
62
|
+
title = heading_to_title(self.contents[i].text)
|
57
63
|
par_nodes = self.contents[i...j].select do |e|
|
58
64
|
e.name == 'p' || e.name == 'ul' && e.text != ""
|
59
65
|
end
|
data/lib/whatsa/cli.rb
CHANGED
@@ -1,4 +1,9 @@
|
|
1
1
|
class Whatsa::CLI
|
2
|
+
# I think this is a bad use of include. I feel like I _should_ make Format a
|
3
|
+
# class and use its methods that way, but I'm going to be using them so often
|
4
|
+
# that I would prefer they not be referenced as Whatsa::Format.blah_blah every
|
5
|
+
# time...
|
6
|
+
include Whatsa::Format
|
2
7
|
|
3
8
|
def clear_screen
|
4
9
|
50.times { puts "\n" }
|
@@ -44,22 +49,6 @@ class Whatsa::CLI
|
|
44
49
|
input
|
45
50
|
end
|
46
51
|
|
47
|
-
# setting an indent will indent the lines AFTER the first line of a paragraph
|
48
|
-
def word_wrap(text, indent=0)
|
49
|
-
count = 0
|
50
|
-
words = text.split(/ /)
|
51
|
-
words.each_with_index do |word, index|
|
52
|
-
count += word.length + 1
|
53
|
-
if count > 80
|
54
|
-
words.insert(index, "\n#{' ' * indent}")
|
55
|
-
count = indent
|
56
|
-
elsif word.index("\n")
|
57
|
-
count = word.length
|
58
|
-
end
|
59
|
-
end
|
60
|
-
words.join(" ").gsub(/^ /, "")
|
61
|
-
end
|
62
|
-
|
63
52
|
def display_dmb(dmb)
|
64
53
|
raise TypeError unless dmb.is_a?(Whatsa::Disambig)
|
65
54
|
clear_screen
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Whatsa
|
2
|
+
module Format
|
3
|
+
|
4
|
+
def url_friendly(string)
|
5
|
+
string.gsub(/[^A-z0-9\(\)]+/, '+').gsub(/(\A\+|\+\z)/, '')
|
6
|
+
end
|
7
|
+
|
8
|
+
def heading_to_title(string)
|
9
|
+
string.gsub('[edit]', '').strip
|
10
|
+
end
|
11
|
+
|
12
|
+
def remove_citation_markers(string)
|
13
|
+
string.gsub(/\[(\d+|citation needed)\]/, "")
|
14
|
+
end
|
15
|
+
|
16
|
+
def bulletize_lines(string)
|
17
|
+
if string.index("\n")
|
18
|
+
list = string.gsub(/^/, "- ")
|
19
|
+
word_wrap(list, "- ".length)
|
20
|
+
else
|
21
|
+
string
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# setting an indent will indent the lines AFTER the first line of a paragraph
|
26
|
+
def word_wrap(text, indent=0)
|
27
|
+
chars = text.split(//)
|
28
|
+
unless text.length < 80
|
29
|
+
count = 1
|
30
|
+
last_space = 80
|
31
|
+
chars.each_with_index do |char, index|
|
32
|
+
count += 1
|
33
|
+
last_space = index if char.match(/ /)
|
34
|
+
if char == "\n"
|
35
|
+
count = indent
|
36
|
+
elsif count == 80
|
37
|
+
chars[last_space] = "\n#{" " * indent}"
|
38
|
+
count = indent + index - last_space
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
chars.join
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
data/lib/whatsa/scraper.rb
CHANGED
@@ -1,4 +1,9 @@
|
|
1
1
|
class Whatsa::Scraper
|
2
|
+
# I think this is a bad use of include. I feel like I _should_ make Format a
|
3
|
+
# class and use its methods that way, but I'm going to be using them so often
|
4
|
+
# that I would prefer they not be referenced as Whatsa::Format.blah_blah every
|
5
|
+
# time...
|
6
|
+
include Whatsa::Format
|
2
7
|
|
3
8
|
WIKISEARCH = 'https://en.wikipedia.org/w/index.php?search='
|
4
9
|
|
@@ -7,8 +12,7 @@ class Whatsa::Scraper
|
|
7
12
|
def initialize(term)
|
8
13
|
# only keep word chars and parens, turn everything between each 'word'
|
9
14
|
# to a single '+' and remove '+'s at the beginning and end if they're there
|
10
|
-
|
11
|
-
@query = term.gsub(/[^A-z0-9\(\)]+/, '+').gsub(/(\A\+|\+\z)/, '')
|
15
|
+
@query = url_friendly(term)
|
12
16
|
|
13
17
|
# store the page in an instance variable so we don't keep polling the site
|
14
18
|
@page = Nokogiri::HTML(open(WIKISEARCH + self.query))
|
@@ -46,5 +50,4 @@ class Whatsa::Scraper
|
|
46
50
|
def make_disambig
|
47
51
|
disambig? ? Whatsa::Disambig.new(self.page) : nil
|
48
52
|
end
|
49
|
-
|
50
53
|
end
|
data/lib/whatsa/section.rb
CHANGED
@@ -1,4 +1,10 @@
|
|
1
1
|
class Whatsa::Section
|
2
|
+
# I think this is a bad use of include. I feel like I _should_ make Format a
|
3
|
+
# class and use its methods that way, but I'm going to be using them so often
|
4
|
+
# that I would prefer they not be referenced as Whatsa::Format.blah_blah every
|
5
|
+
# time...
|
6
|
+
include Whatsa::Format
|
7
|
+
|
2
8
|
attr_accessor :title, :paragraphs, :article
|
3
9
|
|
4
10
|
def initialize(title, paragraphs)
|
@@ -19,11 +25,11 @@ class Whatsa::Section
|
|
19
25
|
private
|
20
26
|
|
21
27
|
def remove_citations
|
22
|
-
self.paragraphs.map! { |par| par
|
28
|
+
self.paragraphs.map! { |par| remove_citation_markers(par) }
|
23
29
|
end
|
24
30
|
|
25
31
|
def bullet_list_pars
|
26
|
-
self.paragraphs.map! { |par|
|
32
|
+
self.paragraphs.map! { |par| bulletize_lines(par) }
|
27
33
|
end
|
28
34
|
|
29
35
|
end
|
data/lib/whatsa/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: whatsa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Keegan Leitz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -90,6 +90,7 @@ files:
|
|
90
90
|
- lib/whatsa/article.rb
|
91
91
|
- lib/whatsa/cli.rb
|
92
92
|
- lib/whatsa/disambig.rb
|
93
|
+
- lib/whatsa/format.rb
|
93
94
|
- lib/whatsa/scraper.rb
|
94
95
|
- lib/whatsa/section.rb
|
95
96
|
- lib/whatsa/version.rb
|