deba 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/deba.rb +1 -1
- data/lib/deba/document.rb +3 -1
- data/lib/deba/extractor.rb +29 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc1db7aa56a4e3a035d8cbefe54cd49742faee24
|
4
|
+
data.tar.gz: 7d1f0a909f5675ae614792642711fdf94f865469
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 78ca7576ddd7698df8851acf773cf566fbb29ceed3748cc73d0b5844add6a6665e760adecb14207358a2217e59b9d48caa1727cd92e71fc65adaa976119f55af
|
7
|
+
data.tar.gz: b2f5628703302d6cad7c5f63a841c53a3e24c973e1d6bf37e3a8216a128c5b2d011ba346d8238b8a933d3cfa551012b8f5a5bbd5da663f207a6cf6583d9b8639
|
data/lib/deba.rb
CHANGED
data/lib/deba/document.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
class Deba::Document
|
2
|
+
BLOCKQUOTE = "> "
|
3
|
+
|
2
4
|
attr_reader :content
|
3
5
|
|
4
6
|
def initialize(extractor)
|
@@ -22,7 +24,7 @@ class Deba::Document
|
|
22
24
|
|
23
25
|
@args.unshift(@segments)
|
24
26
|
block = @block_type.new(*@args).to_a
|
25
|
-
block.unshift(
|
27
|
+
block.unshift(BLOCKQUOTE) if @extractor.in_blockquote?
|
26
28
|
|
27
29
|
@content << Deba::Stringifier.new(block).stringify
|
28
30
|
end
|
data/lib/deba/extractor.rb
CHANGED
@@ -1,7 +1,34 @@
|
|
1
1
|
class Deba::Extractor
|
2
2
|
HEADING_TAGS = %w(h1 h2 h3 h4 h5 h6)
|
3
|
-
BLOCK_INITIATING_TAGS = %w(
|
3
|
+
BLOCK_INITIATING_TAGS = %w(
|
4
|
+
address
|
5
|
+
article
|
6
|
+
aside
|
7
|
+
body
|
8
|
+
blockquote
|
9
|
+
div
|
10
|
+
dd
|
11
|
+
dl
|
12
|
+
dt
|
13
|
+
figure
|
14
|
+
footer
|
15
|
+
header
|
16
|
+
li
|
17
|
+
main
|
18
|
+
nav
|
19
|
+
ol
|
20
|
+
p
|
21
|
+
pre
|
22
|
+
section
|
23
|
+
td
|
24
|
+
th
|
25
|
+
ul)
|
4
26
|
ENHANCERS = { %w(b strong) => "*", %w(i em) => "_" }
|
27
|
+
SKIP_TAGS = %w(
|
28
|
+
head
|
29
|
+
style
|
30
|
+
script
|
31
|
+
)
|
5
32
|
|
6
33
|
attr_reader :blocks
|
7
34
|
|
@@ -27,7 +54,7 @@ class Deba::Extractor
|
|
27
54
|
|
28
55
|
node_name = node.name.downcase
|
29
56
|
|
30
|
-
return if node_name
|
57
|
+
return if SKIP_TAGS.include?(node_name)
|
31
58
|
|
32
59
|
#Handle repeated brs by making a paragraph break
|
33
60
|
if node_name == 'br'
|