deba 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/deba/blockquote.rb +5 -0
- data/lib/deba/extractor.rb +14 -6
- data/lib/deba/list_item.rb +5 -2
- data/lib/deba/paragraph.rb +3 -2
- data/lib/deba/stringifier.rb +12 -1
- data/lib/deba.rb +2 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5a06ceb1a911deadddfcd27e1af4fcd50516498
|
4
|
+
data.tar.gz: f72a8c85575d467632818f87baf7d66cd39099ae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 12dadca99b049c5b89ca9a37c11990b1b150f2d59b1540e13da476a3152ea3c67c3a45400901318d3630bf2c7b668803030126828ead347636a0a33c6f231db9
|
7
|
+
data.tar.gz: b2c363d1c12cac1cfd79c05e6c545cdfc037942478b9009dc207633466bc1ab9d3de4e6f3e4a27ef2f051f3b192b2196690da5f2d315ff58b9a3952c87b6b529
|
data/lib/deba/extractor.rb
CHANGED
@@ -34,7 +34,7 @@ class Deba::Extractor
|
|
34
34
|
if @just_appended_br
|
35
35
|
@just_appended_br = false
|
36
36
|
|
37
|
-
@text_run.break(Deba::Paragraph)
|
37
|
+
@text_run.break(Deba::Paragraph, line_prefix(node))
|
38
38
|
|
39
39
|
return
|
40
40
|
else
|
@@ -67,18 +67,18 @@ class Deba::Extractor
|
|
67
67
|
if node_name == 'li'
|
68
68
|
last_item = node.xpath('count(following-sibling::li)').to_i == 0
|
69
69
|
index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
|
70
|
-
@text_run.break(Deba::ListItem, last_item, index)
|
70
|
+
@text_run.break(Deba::ListItem, line_prefix(node), last_item, index)
|
71
71
|
node.children.each { |n| process(n) }
|
72
|
-
@text_run.break(Deba::Paragraph)
|
72
|
+
@text_run.break(Deba::Paragraph, line_prefix(node))
|
73
73
|
|
74
74
|
return
|
75
75
|
end
|
76
76
|
|
77
77
|
#These tags terminate the current paragraph, if present, and start a new paragraph
|
78
78
|
if BLOCK_INITIATING_TAGS.include?(node_name)
|
79
|
-
@text_run.break(Deba::Paragraph)
|
79
|
+
@text_run.break(Deba::Paragraph, line_prefix(node))
|
80
80
|
node.children.each { |n| process(n) }
|
81
|
-
@text_run.break(Deba::Paragraph)
|
81
|
+
@text_run.break(Deba::Paragraph, line_prefix(node))
|
82
82
|
|
83
83
|
return
|
84
84
|
end
|
@@ -86,7 +86,7 @@ class Deba::Extractor
|
|
86
86
|
if HEADING_TAGS.include?(node_name)
|
87
87
|
@text_run.break(Deba::Heading, node_name[1..-1].to_i)
|
88
88
|
node.children.each { |n| process(n) }
|
89
|
-
@text_run.break(Deba::Paragraph)
|
89
|
+
@text_run.break(Deba::Paragraph, line_prefix(node))
|
90
90
|
|
91
91
|
return
|
92
92
|
end
|
@@ -94,4 +94,12 @@ class Deba::Extractor
|
|
94
94
|
#Pretend that the children of this node were siblings of this node (move them one level up the tree)
|
95
95
|
node.children.each { |n| process(n) }
|
96
96
|
end
|
97
|
+
|
98
|
+
def line_prefix(node)
|
99
|
+
if node.xpath('boolean(ancestor::blockquote)')
|
100
|
+
Deba::Blockquote.new
|
101
|
+
else
|
102
|
+
nil
|
103
|
+
end
|
104
|
+
end
|
97
105
|
end
|
data/lib/deba/list_item.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
class Deba::ListItem
|
2
2
|
attr_reader :segments
|
3
3
|
|
4
|
-
def initialize(segments, last, index)
|
4
|
+
def initialize(segments, line_prefix, last, index)
|
5
5
|
@segments = segments
|
6
|
+
@line_prefix = line_prefix
|
6
7
|
@last = last
|
7
8
|
@index = index
|
8
9
|
end
|
@@ -14,6 +15,8 @@ class Deba::ListItem
|
|
14
15
|
"#{@index}. "
|
15
16
|
end
|
16
17
|
|
17
|
-
|
18
|
+
@segments.unshift(prefix)
|
19
|
+
|
20
|
+
"#{Deba::Stringifier.new(@segments, @line_prefix).stringify}\n#{"\n" if @last}"
|
18
21
|
end
|
19
22
|
end
|
data/lib/deba/paragraph.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
class Deba::Paragraph
|
2
2
|
attr_reader :segments
|
3
3
|
|
4
|
-
def initialize(segments)
|
4
|
+
def initialize(segments, line_prefix)
|
5
5
|
@segments = segments
|
6
|
+
@line_prefix = line_prefix
|
6
7
|
end
|
7
8
|
|
8
9
|
def to_s
|
9
|
-
"#{Deba::Stringifier.new(@segments).stringify}\n\n"
|
10
|
+
"#{Deba::Stringifier.new(@segments, @line_prefix).stringify}\n\n"
|
10
11
|
end
|
11
12
|
end
|
data/lib/deba/stringifier.rb
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
class Deba::Stringifier
|
2
|
-
def initialize(segments)
|
2
|
+
def initialize(segments, line_prefix = nil)
|
3
3
|
@segments = segments
|
4
|
+
@line_prefix = line_prefix
|
4
5
|
end
|
5
6
|
|
6
7
|
def stringify
|
8
|
+
prefix(chunkify)
|
9
|
+
end
|
10
|
+
|
11
|
+
def chunkify
|
7
12
|
chunks = @segments.chunk { |segment| segment.class }
|
8
13
|
|
9
14
|
chunks.map do |type, chunk_segments|
|
@@ -14,4 +19,10 @@ class Deba::Stringifier
|
|
14
19
|
end
|
15
20
|
end.join
|
16
21
|
end
|
22
|
+
|
23
|
+
def prefix(text)
|
24
|
+
return text if @line_prefix.nil?
|
25
|
+
|
26
|
+
text.gsub(/^/, @line_prefix.to_s)
|
27
|
+
end
|
17
28
|
end
|
data/lib/deba.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "nokogiri"
|
2
2
|
|
3
3
|
module Deba
|
4
|
-
VERSION = "0.
|
4
|
+
VERSION = "0.9.0"
|
5
5
|
end
|
6
6
|
|
7
7
|
require "deba/utils"
|
@@ -10,6 +10,7 @@ require "deba/document"
|
|
10
10
|
require "deba/break"
|
11
11
|
require "deba/heading"
|
12
12
|
require "deba/list_item"
|
13
|
+
require "deba/blockquote"
|
13
14
|
require "deba/paragraph"
|
14
15
|
require "deba/text_runner"
|
15
16
|
require "deba/extractor"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deba
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brenton "B-Train" Fletcher
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -71,6 +71,7 @@ files:
|
|
71
71
|
- deba.gemspec
|
72
72
|
- exe/deba
|
73
73
|
- lib/deba.rb
|
74
|
+
- lib/deba/blockquote.rb
|
74
75
|
- lib/deba/break.rb
|
75
76
|
- lib/deba/document.rb
|
76
77
|
- lib/deba/extractor.rb
|
@@ -100,7 +101,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
100
101
|
version: '0'
|
101
102
|
requirements: []
|
102
103
|
rubyforge_project:
|
103
|
-
rubygems_version: 2.
|
104
|
+
rubygems_version: 2.4.5.1
|
104
105
|
signing_key:
|
105
106
|
specification_version: 4
|
106
107
|
summary: Fillet HTML using this Deba knife to extract the juicy text content
|